gcc/gcc/optabs-tree.c
Richard Sandiford bfaa08b7ba Add support for SVE gather loads
This patch adds support for SVE gather loads.  It uses the basically
the same analysis code as the AVX gather support, but after that
there are two major differences:

- It uses new internal functions rather than target built-ins.
  The interface is:

     IFN_GATHER_LOAD (base, offsets scale)
     IFN_MASK_GATHER_LOAD (base, offsets scale, mask)

  which should be reasonably generic.  One of the advantages of
  using internal functions is that other passes can understand what
  the functions do, but a more immediate advantage is that we can
  query the underlying target pattern to see which scales it supports.

- It uses pattern recognition to convert the offset to the right width,
  if it was originally narrower than that.  This avoids having to do
  a widening operation as part of the gather expansion itself.

2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* doc/md.texi (gather_load@var{m}): Document.
	(mask_gather_load@var{m}): Likewise.
	* genopinit.c (main): Add supports_vec_gather_load and
	supports_vec_gather_load_cached to target_optabs.
	* optabs-tree.c (init_tree_optimization_optabs): Use
	ggc_cleared_alloc to allocate target_optabs.
	* optabs.def (gather_load_optab, mask_gather_laod_optab): New optabs.
	* internal-fn.def (GATHER_LOAD, MASK_GATHER_LOAD): New internal
	functions.
	* internal-fn.h (internal_load_fn_p): Declare.
	(internal_gather_scatter_fn_p): Likewise.
	(internal_fn_mask_index): Likewise.
	(internal_gather_scatter_fn_supported_p): Likewise.
	* internal-fn.c (gather_load_direct): New macro.
	(expand_gather_load_optab_fn): New function.
	(direct_gather_load_optab_supported_p): New macro.
	(direct_internal_fn_optab): New function.
	(internal_load_fn_p): Likewise.
	(internal_gather_scatter_fn_p): Likewise.
	(internal_fn_mask_index): Likewise.
	(internal_gather_scatter_fn_supported_p): Likewise.
	* optabs-query.c (supports_at_least_one_mode_p): New function.
	(supports_vec_gather_load_p): Likewise.
	* optabs-query.h (supports_vec_gather_load_p): Declare.
	* tree-vectorizer.h (gather_scatter_info): Add ifn, element_type
	and memory_type field.
	(NUM_PATTERNS): Bump to 15.
	* tree-vect-data-refs.c: Include internal-fn.h.
	(vect_gather_scatter_fn_p): New function.
	(vect_describe_gather_scatter_call): Likewise.
	(vect_check_gather_scatter): Try using internal functions for
	gather loads.  Recognize existing calls to a gather load function.
	(vect_analyze_data_refs): Consider using gather loads if
	supports_vec_gather_load_p.
	* tree-vect-patterns.c (vect_get_load_store_mask): New function.
	(vect_get_gather_scatter_offset_type): Likewise.
	(vect_convert_mask_for_vectype): Likewise.
	(vect_add_conversion_to_patterm): Likewise.
	(vect_try_gather_scatter_pattern): Likewise.
	(vect_recog_gather_scatter_pattern): New pattern recognizer.
	(vect_vect_recog_func_ptrs): Add it.
	* tree-vect-stmts.c (exist_non_indexing_operands_for_use_p): Use
	internal_fn_mask_index and internal_gather_scatter_fn_p.
	(check_load_store_masking): Take the gather_scatter_info as an
	argument and handle gather loads.
	(vect_get_gather_scatter_ops): New function.
	(vectorizable_call): Check internal_load_fn_p.
	(vectorizable_load): Likewise.  Handle gather load internal
	functions.
	(vectorizable_store): Update call to check_load_store_masking.
	* config/aarch64/aarch64.md (UNSPEC_LD1_GATHER): New unspec.
	* config/aarch64/iterators.md (SVE_S, SVE_D): New mode iterators.
	* config/aarch64/predicates.md (aarch64_gather_scale_operand_w)
	(aarch64_gather_scale_operand_d): New predicates.
	* config/aarch64/aarch64-sve.md (gather_load<mode>): New expander.
	(mask_gather_load<mode>): New insns.

gcc/testsuite/
	* gcc.target/aarch64/sve/gather_load_1.c: New test.
	* gcc.target/aarch64/sve/gather_load_2.c: Likewise.
	* gcc.target/aarch64/sve/gather_load_3.c: Likewise.
	* gcc.target/aarch64/sve/gather_load_4.c: Likewise.
	* gcc.target/aarch64/sve/gather_load_5.c: Likewise.
	* gcc.target/aarch64/sve/gather_load_6.c: Likewise.
	* gcc.target/aarch64/sve/gather_load_7.c: Likewise.
	* gcc.target/aarch64/sve/mask_gather_load_1.c: Likewise.
	* gcc.target/aarch64/sve/mask_gather_load_2.c: Likewise.
	* gcc.target/aarch64/sve/mask_gather_load_3.c: Likewise.
	* gcc.target/aarch64/sve/mask_gather_load_4.c: Likewise.
	* gcc.target/aarch64/sve/mask_gather_load_5.c: Likewise.
	* gcc.target/aarch64/sve/mask_gather_load_6.c: Likewise.
	* gcc.target/aarch64/sve/mask_gather_load_7.c: Likewise.

Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>

From-SVN: r256640
2018-01-13 18:01:34 +00:00

390 lines
12 KiB
C

/* Tree-based target query functions relating to optabs
Copyright (C) 1987-2018 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "target.h"
#include "insn-codes.h"
#include "tree.h"
#include "optabs-tree.h"
#include "stor-layout.h"
/* Return the optab used for computing the operation given by the tree code,
CODE and the tree EXP. This function is not always usable (for example, it
cannot give complete results for multiplication or division) but probably
ought to be relied on more widely throughout the expander. */
optab
optab_for_tree_code (enum tree_code code, const_tree type,
enum optab_subtype subtype)
{
bool trapv;
switch (code)
{
case BIT_AND_EXPR:
return and_optab;
case BIT_IOR_EXPR:
return ior_optab;
case BIT_NOT_EXPR:
return one_cmpl_optab;
case BIT_XOR_EXPR:
return xor_optab;
case MULT_HIGHPART_EXPR:
return TYPE_UNSIGNED (type) ? umul_highpart_optab : smul_highpart_optab;
case TRUNC_MOD_EXPR:
case CEIL_MOD_EXPR:
case FLOOR_MOD_EXPR:
case ROUND_MOD_EXPR:
return TYPE_UNSIGNED (type) ? umod_optab : smod_optab;
case RDIV_EXPR:
case TRUNC_DIV_EXPR:
case CEIL_DIV_EXPR:
case FLOOR_DIV_EXPR:
case ROUND_DIV_EXPR:
case EXACT_DIV_EXPR:
if (TYPE_SATURATING (type))
return TYPE_UNSIGNED (type) ? usdiv_optab : ssdiv_optab;
return TYPE_UNSIGNED (type) ? udiv_optab : sdiv_optab;
case LSHIFT_EXPR:
if (TREE_CODE (type) == VECTOR_TYPE)
{
if (subtype == optab_vector)
return TYPE_SATURATING (type) ? unknown_optab : vashl_optab;
gcc_assert (subtype == optab_scalar);
}
if (TYPE_SATURATING (type))
return TYPE_UNSIGNED (type) ? usashl_optab : ssashl_optab;
return ashl_optab;
case RSHIFT_EXPR:
if (TREE_CODE (type) == VECTOR_TYPE)
{
if (subtype == optab_vector)
return TYPE_UNSIGNED (type) ? vlshr_optab : vashr_optab;
gcc_assert (subtype == optab_scalar);
}
return TYPE_UNSIGNED (type) ? lshr_optab : ashr_optab;
case LROTATE_EXPR:
if (TREE_CODE (type) == VECTOR_TYPE)
{
if (subtype == optab_vector)
return vrotl_optab;
gcc_assert (subtype == optab_scalar);
}
return rotl_optab;
case RROTATE_EXPR:
if (TREE_CODE (type) == VECTOR_TYPE)
{
if (subtype == optab_vector)
return vrotr_optab;
gcc_assert (subtype == optab_scalar);
}
return rotr_optab;
case MAX_EXPR:
return TYPE_UNSIGNED (type) ? umax_optab : smax_optab;
case MIN_EXPR:
return TYPE_UNSIGNED (type) ? umin_optab : smin_optab;
case REALIGN_LOAD_EXPR:
return vec_realign_load_optab;
case WIDEN_SUM_EXPR:
return TYPE_UNSIGNED (type) ? usum_widen_optab : ssum_widen_optab;
case DOT_PROD_EXPR:
return TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab;
case SAD_EXPR:
return TYPE_UNSIGNED (type) ? usad_optab : ssad_optab;
case WIDEN_MULT_PLUS_EXPR:
return (TYPE_UNSIGNED (type)
? (TYPE_SATURATING (type)
? usmadd_widen_optab : umadd_widen_optab)
: (TYPE_SATURATING (type)
? ssmadd_widen_optab : smadd_widen_optab));
case WIDEN_MULT_MINUS_EXPR:
return (TYPE_UNSIGNED (type)
? (TYPE_SATURATING (type)
? usmsub_widen_optab : umsub_widen_optab)
: (TYPE_SATURATING (type)
? ssmsub_widen_optab : smsub_widen_optab));
case FMA_EXPR:
return fma_optab;
case VEC_WIDEN_MULT_HI_EXPR:
return TYPE_UNSIGNED (type) ?
vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
case VEC_WIDEN_MULT_LO_EXPR:
return TYPE_UNSIGNED (type) ?
vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
case VEC_WIDEN_MULT_EVEN_EXPR:
return TYPE_UNSIGNED (type) ?
vec_widen_umult_even_optab : vec_widen_smult_even_optab;
case VEC_WIDEN_MULT_ODD_EXPR:
return TYPE_UNSIGNED (type) ?
vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
case VEC_WIDEN_LSHIFT_HI_EXPR:
return TYPE_UNSIGNED (type) ?
vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
case VEC_WIDEN_LSHIFT_LO_EXPR:
return TYPE_UNSIGNED (type) ?
vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
case VEC_UNPACK_HI_EXPR:
return TYPE_UNSIGNED (type) ?
vec_unpacku_hi_optab : vec_unpacks_hi_optab;
case VEC_UNPACK_LO_EXPR:
return TYPE_UNSIGNED (type) ?
vec_unpacku_lo_optab : vec_unpacks_lo_optab;
case VEC_UNPACK_FLOAT_HI_EXPR:
/* The signedness is determined from input operand. */
return TYPE_UNSIGNED (type) ?
vec_unpacku_float_hi_optab : vec_unpacks_float_hi_optab;
case VEC_UNPACK_FLOAT_LO_EXPR:
/* The signedness is determined from input operand. */
return TYPE_UNSIGNED (type) ?
vec_unpacku_float_lo_optab : vec_unpacks_float_lo_optab;
case VEC_PACK_TRUNC_EXPR:
return vec_pack_trunc_optab;
case VEC_PACK_SAT_EXPR:
return TYPE_UNSIGNED (type) ? vec_pack_usat_optab : vec_pack_ssat_optab;
case VEC_PACK_FIX_TRUNC_EXPR:
/* The signedness is determined from output operand. */
return TYPE_UNSIGNED (type) ?
vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab;
case VEC_DUPLICATE_EXPR:
return vec_duplicate_optab;
case VEC_SERIES_EXPR:
return vec_series_optab;
default:
break;
}
trapv = INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type);
switch (code)
{
case POINTER_PLUS_EXPR:
case PLUS_EXPR:
if (TYPE_SATURATING (type))
return TYPE_UNSIGNED (type) ? usadd_optab : ssadd_optab;
return trapv ? addv_optab : add_optab;
case POINTER_DIFF_EXPR:
case MINUS_EXPR:
if (TYPE_SATURATING (type))
return TYPE_UNSIGNED (type) ? ussub_optab : sssub_optab;
return trapv ? subv_optab : sub_optab;
case MULT_EXPR:
if (TYPE_SATURATING (type))
return TYPE_UNSIGNED (type) ? usmul_optab : ssmul_optab;
return trapv ? smulv_optab : smul_optab;
case NEGATE_EXPR:
if (TYPE_SATURATING (type))
return TYPE_UNSIGNED (type) ? usneg_optab : ssneg_optab;
return trapv ? negv_optab : neg_optab;
case ABS_EXPR:
return trapv ? absv_optab : abs_optab;
default:
return unknown_optab;
}
}
/* Function supportable_convert_operation
Check whether an operation represented by the code CODE is a
convert operation that is supported by the target platform in
vector form (i.e., when operating on arguments of type VECTYPE_IN
producing a result of type VECTYPE_OUT).
Convert operations we currently support directly are FIX_TRUNC and FLOAT.
This function checks if these operations are supported
by the target platform either directly (via vector tree-codes), or via
target builtins.
Output:
- CODE1 is code of vector operation to be used when
vectorizing the operation, if available.
- DECL is decl of target builtin functions to be used
when vectorizing the operation, if available. In this case,
CODE1 is CALL_EXPR. */
bool
supportable_convert_operation (enum tree_code code,
tree vectype_out, tree vectype_in,
tree *decl, enum tree_code *code1)
{
machine_mode m1,m2;
bool truncp;
m1 = TYPE_MODE (vectype_out);
m2 = TYPE_MODE (vectype_in);
/* First check if we can done conversion directly. */
if ((code == FIX_TRUNC_EXPR
&& can_fix_p (m1,m2,TYPE_UNSIGNED (vectype_out), &truncp)
!= CODE_FOR_nothing)
|| (code == FLOAT_EXPR
&& can_float_p (m1,m2,TYPE_UNSIGNED (vectype_in))
!= CODE_FOR_nothing))
{
*code1 = code;
return true;
}
/* Now check for builtin. */
if (targetm.vectorize.builtin_conversion
&& targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
{
*code1 = CALL_EXPR;
*decl = targetm.vectorize.builtin_conversion (code, vectype_out,
vectype_in);
return true;
}
return false;
}
/* Return TRUE if appropriate vector insn is available
for vector comparison expr with vector type VALUE_TYPE
and resulting mask with MASK_TYPE. */
bool
expand_vec_cmp_expr_p (tree value_type, tree mask_type, enum tree_code code)
{
if (get_vec_cmp_icode (TYPE_MODE (value_type), TYPE_MODE (mask_type),
TYPE_UNSIGNED (value_type)) != CODE_FOR_nothing)
return true;
if ((code == EQ_EXPR || code == NE_EXPR)
&& (get_vec_cmp_eq_icode (TYPE_MODE (value_type), TYPE_MODE (mask_type))
!= CODE_FOR_nothing))
return true;
return false;
}
/* Return TRUE iff, appropriate vector insns are available
for vector cond expr with vector type VALUE_TYPE and a comparison
with operand vector types in CMP_OP_TYPE. */
bool
expand_vec_cond_expr_p (tree value_type, tree cmp_op_type, enum tree_code code)
{
machine_mode value_mode = TYPE_MODE (value_type);
machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type);
if (VECTOR_BOOLEAN_TYPE_P (cmp_op_type)
&& get_vcond_mask_icode (TYPE_MODE (value_type),
TYPE_MODE (cmp_op_type)) != CODE_FOR_nothing)
return true;
if (maybe_ne (GET_MODE_SIZE (value_mode), GET_MODE_SIZE (cmp_op_mode))
|| maybe_ne (GET_MODE_NUNITS (value_mode), GET_MODE_NUNITS (cmp_op_mode)))
return false;
if (get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type),
TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing
&& ((code != EQ_EXPR && code != NE_EXPR)
|| get_vcond_eq_icode (TYPE_MODE (value_type),
TYPE_MODE (cmp_op_type)) == CODE_FOR_nothing))
return false;
return true;
}
/* Use the current target and options to initialize
TREE_OPTIMIZATION_OPTABS (OPTNODE). */
void
init_tree_optimization_optabs (tree optnode)
{
/* Quick exit if we have already computed optabs for this target. */
if (TREE_OPTIMIZATION_BASE_OPTABS (optnode) == this_target_optabs)
return;
/* Forget any previous information and set up for the current target. */
TREE_OPTIMIZATION_BASE_OPTABS (optnode) = this_target_optabs;
struct target_optabs *tmp_optabs = (struct target_optabs *)
TREE_OPTIMIZATION_OPTABS (optnode);
if (tmp_optabs)
memset (tmp_optabs, 0, sizeof (struct target_optabs));
else
tmp_optabs = ggc_cleared_alloc<target_optabs> ();
/* Generate a new set of optabs into tmp_optabs. */
init_all_optabs (tmp_optabs);
/* If the optabs changed, record it. */
if (memcmp (tmp_optabs, this_target_optabs, sizeof (struct target_optabs)))
TREE_OPTIMIZATION_OPTABS (optnode) = tmp_optabs;
else
{
TREE_OPTIMIZATION_OPTABS (optnode) = NULL;
ggc_free (tmp_optabs);
}
}
/* Return TRUE if the target has support for vector right shift of an
operand of type TYPE. If OT_TYPE is OPTAB_DEFAULT, check for existence
of a shift by either a scalar or a vector. Otherwise, check only
for a shift that matches OT_TYPE. */
bool
target_supports_op_p (tree type, enum tree_code code,
enum optab_subtype ot_subtype)
{
optab ot = optab_for_tree_code (code, type, ot_subtype);
return (ot != unknown_optab
&& optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing);
}