3252 lines
108 KiB
C++
3252 lines
108 KiB
C++
/* Builtins' description for AArch64 SIMD architecture.
|
|
Copyright (C) 2011-2022 Free Software Foundation, Inc.
|
|
Contributed by ARM Ltd.
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#define IN_TARGET_CODE 1
|
|
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "tm.h"
|
|
#include "function.h"
|
|
#include "basic-block.h"
|
|
#include "rtl.h"
|
|
#include "tree.h"
|
|
#include "gimple.h"
|
|
#include "ssa.h"
|
|
#include "memmodel.h"
|
|
#include "tm_p.h"
|
|
#include "expmed.h"
|
|
#include "optabs.h"
|
|
#include "recog.h"
|
|
#include "diagnostic-core.h"
|
|
#include "fold-const.h"
|
|
#include "stor-layout.h"
|
|
#include "explow.h"
|
|
#include "expr.h"
|
|
#include "langhooks.h"
|
|
#include "gimple-iterator.h"
|
|
#include "case-cfn-macros.h"
|
|
#include "emit-rtl.h"
|
|
#include "stringpool.h"
|
|
#include "attribs.h"
|
|
#include "gimple-fold.h"
|
|
|
|
#define v8qi_UP E_V8QImode
|
|
#define v8di_UP E_V8DImode
|
|
#define v4hi_UP E_V4HImode
|
|
#define v4hf_UP E_V4HFmode
|
|
#define v2si_UP E_V2SImode
|
|
#define v2sf_UP E_V2SFmode
|
|
#define v1df_UP E_V1DFmode
|
|
#define di_UP E_DImode
|
|
#define df_UP E_DFmode
|
|
#define v16qi_UP E_V16QImode
|
|
#define v8hi_UP E_V8HImode
|
|
#define v8hf_UP E_V8HFmode
|
|
#define v4si_UP E_V4SImode
|
|
#define v4sf_UP E_V4SFmode
|
|
#define v2di_UP E_V2DImode
|
|
#define v2df_UP E_V2DFmode
|
|
#define ti_UP E_TImode
|
|
#define oi_UP E_OImode
|
|
#define ci_UP E_CImode
|
|
#define xi_UP E_XImode
|
|
#define si_UP E_SImode
|
|
#define sf_UP E_SFmode
|
|
#define hi_UP E_HImode
|
|
#define hf_UP E_HFmode
|
|
#define qi_UP E_QImode
|
|
#define bf_UP E_BFmode
|
|
#define v4bf_UP E_V4BFmode
|
|
#define v8bf_UP E_V8BFmode
|
|
#define v2x8qi_UP E_V2x8QImode
|
|
#define v2x4hi_UP E_V2x4HImode
|
|
#define v2x4hf_UP E_V2x4HFmode
|
|
#define v2x4bf_UP E_V2x4BFmode
|
|
#define v2x2si_UP E_V2x2SImode
|
|
#define v2x2sf_UP E_V2x2SFmode
|
|
#define v2x1di_UP E_V2x1DImode
|
|
#define v2x1df_UP E_V2x1DFmode
|
|
#define v2x16qi_UP E_V2x16QImode
|
|
#define v2x8hi_UP E_V2x8HImode
|
|
#define v2x8hf_UP E_V2x8HFmode
|
|
#define v2x8bf_UP E_V2x8BFmode
|
|
#define v2x4si_UP E_V2x4SImode
|
|
#define v2x4sf_UP E_V2x4SFmode
|
|
#define v2x2di_UP E_V2x2DImode
|
|
#define v2x2df_UP E_V2x2DFmode
|
|
#define v3x8qi_UP E_V3x8QImode
|
|
#define v3x4hi_UP E_V3x4HImode
|
|
#define v3x4hf_UP E_V3x4HFmode
|
|
#define v3x4bf_UP E_V3x4BFmode
|
|
#define v3x2si_UP E_V3x2SImode
|
|
#define v3x2sf_UP E_V3x2SFmode
|
|
#define v3x1di_UP E_V3x1DImode
|
|
#define v3x1df_UP E_V3x1DFmode
|
|
#define v3x16qi_UP E_V3x16QImode
|
|
#define v3x8hi_UP E_V3x8HImode
|
|
#define v3x8hf_UP E_V3x8HFmode
|
|
#define v3x8bf_UP E_V3x8BFmode
|
|
#define v3x4si_UP E_V3x4SImode
|
|
#define v3x4sf_UP E_V3x4SFmode
|
|
#define v3x2di_UP E_V3x2DImode
|
|
#define v3x2df_UP E_V3x2DFmode
|
|
#define v4x8qi_UP E_V4x8QImode
|
|
#define v4x4hi_UP E_V4x4HImode
|
|
#define v4x4hf_UP E_V4x4HFmode
|
|
#define v4x4bf_UP E_V4x4BFmode
|
|
#define v4x2si_UP E_V4x2SImode
|
|
#define v4x2sf_UP E_V4x2SFmode
|
|
#define v4x1di_UP E_V4x1DImode
|
|
#define v4x1df_UP E_V4x1DFmode
|
|
#define v4x16qi_UP E_V4x16QImode
|
|
#define v4x8hi_UP E_V4x8HImode
|
|
#define v4x8hf_UP E_V4x8HFmode
|
|
#define v4x8bf_UP E_V4x8BFmode
|
|
#define v4x4si_UP E_V4x4SImode
|
|
#define v4x4sf_UP E_V4x4SFmode
|
|
#define v4x2di_UP E_V4x2DImode
|
|
#define v4x2df_UP E_V4x2DFmode
|
|
#define UP(X) X##_UP
|
|
|
|
#define SIMD_MAX_BUILTIN_ARGS 5
|
|
|
|
enum aarch64_type_qualifiers
|
|
{
|
|
/* T foo. */
|
|
qualifier_none = 0x0,
|
|
/* unsigned T foo. */
|
|
qualifier_unsigned = 0x1, /* 1 << 0 */
|
|
/* const T foo. */
|
|
qualifier_const = 0x2, /* 1 << 1 */
|
|
/* T *foo. */
|
|
qualifier_pointer = 0x4, /* 1 << 2 */
|
|
/* Used when expanding arguments if an operand could
|
|
be an immediate. */
|
|
qualifier_immediate = 0x8, /* 1 << 3 */
|
|
qualifier_maybe_immediate = 0x10, /* 1 << 4 */
|
|
/* void foo (...). */
|
|
qualifier_void = 0x20, /* 1 << 5 */
|
|
/* Some patterns may have internal operands, this qualifier is an
|
|
instruction to the initialisation code to skip this operand. */
|
|
qualifier_internal = 0x40, /* 1 << 6 */
|
|
/* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
|
|
rather than using the type of the operand. */
|
|
qualifier_map_mode = 0x80, /* 1 << 7 */
|
|
/* qualifier_pointer | qualifier_map_mode */
|
|
qualifier_pointer_map_mode = 0x84,
|
|
/* qualifier_const | qualifier_pointer | qualifier_map_mode */
|
|
qualifier_const_pointer_map_mode = 0x86,
|
|
/* Polynomial types. */
|
|
qualifier_poly = 0x100,
|
|
/* Lane indices - must be in range, and flipped for bigendian. */
|
|
qualifier_lane_index = 0x200,
|
|
/* Lane indices for single lane structure loads and stores. */
|
|
qualifier_struct_load_store_lane_index = 0x400,
|
|
/* Lane indices selected in pairs. - must be in range, and flipped for
|
|
bigendian. */
|
|
qualifier_lane_pair_index = 0x800,
|
|
/* Lane indices selected in quadtuplets. - must be in range, and flipped for
|
|
bigendian. */
|
|
qualifier_lane_quadtup_index = 0x1000,
|
|
};
|
|
|
|
/* Flags that describe what a function might do. */
|
|
const unsigned int FLAG_NONE = 0U;
|
|
const unsigned int FLAG_READ_FPCR = 1U << 0;
|
|
const unsigned int FLAG_RAISE_FP_EXCEPTIONS = 1U << 1;
|
|
const unsigned int FLAG_READ_MEMORY = 1U << 2;
|
|
const unsigned int FLAG_PREFETCH_MEMORY = 1U << 3;
|
|
const unsigned int FLAG_WRITE_MEMORY = 1U << 4;
|
|
|
|
/* Not all FP intrinsics raise FP exceptions or read FPCR register,
|
|
use this flag to suppress it. */
|
|
const unsigned int FLAG_AUTO_FP = 1U << 5;
|
|
|
|
const unsigned int FLAG_FP = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS;
|
|
const unsigned int FLAG_ALL = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS
|
|
| FLAG_READ_MEMORY | FLAG_PREFETCH_MEMORY | FLAG_WRITE_MEMORY;
|
|
const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY | FLAG_AUTO_FP;
|
|
const unsigned int FLAG_LOAD = FLAG_READ_MEMORY | FLAG_AUTO_FP;
|
|
|
|
typedef struct
|
|
{
|
|
const char *name;
|
|
machine_mode mode;
|
|
const enum insn_code code;
|
|
unsigned int fcode;
|
|
enum aarch64_type_qualifiers *qualifiers;
|
|
unsigned int flags;
|
|
} aarch64_simd_builtin_datum;
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none };
|
|
#define TYPES_UNOP (aarch64_types_unop_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned };
|
|
#define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_none };
|
|
#define TYPES_UNOPUS (aarch64_types_unopus_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_maybe_immediate };
|
|
#define TYPES_BINOP (aarch64_types_binop_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
|
|
#define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned, qualifier_none };
|
|
#define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_unsigned };
|
|
#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_none, qualifier_none };
|
|
#define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_poly, qualifier_poly, qualifier_poly };
|
|
#define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_binop_ppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_poly, qualifier_poly, qualifier_unsigned };
|
|
#define TYPES_BINOP_PPU (aarch64_types_binop_ppu_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
|
|
#define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
|
|
#define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned,
|
|
qualifier_unsigned, qualifier_unsigned };
|
|
#define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_ternopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned,
|
|
qualifier_unsigned, qualifier_lane_index };
|
|
#define TYPES_TERNOPU_LANE (aarch64_types_ternopu_lane_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned,
|
|
qualifier_unsigned, qualifier_immediate };
|
|
#define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_ternop_sssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_none, qualifier_unsigned };
|
|
#define TYPES_TERNOP_SSSU (aarch64_types_ternop_sssu_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
|
|
#define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_ternop_suss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_unsigned, qualifier_none, qualifier_none };
|
|
#define TYPES_TERNOP_SUSS (aarch64_types_ternop_suss_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_binop_pppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_unsigned };
|
|
#define TYPES_TERNOP_PPPU (aarch64_types_binop_pppu_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_none,
|
|
qualifier_none, qualifier_lane_pair_index };
|
|
#define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_none,
|
|
qualifier_none, qualifier_lane_index };
|
|
#define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
|
|
qualifier_unsigned, qualifier_lane_index };
|
|
#define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_unsigned,
|
|
qualifier_none, qualifier_lane_quadtup_index };
|
|
#define TYPES_QUADOPSSUS_LANE_QUADTUP \
|
|
(aarch64_types_quadopssus_lane_quadtup_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_none,
|
|
qualifier_unsigned, qualifier_lane_quadtup_index };
|
|
#define TYPES_QUADOPSSSU_LANE_QUADTUP \
|
|
(aarch64_types_quadopsssu_lane_quadtup_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
|
|
qualifier_unsigned, qualifier_immediate };
|
|
#define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_immediate };
|
|
#define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
|
|
#define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_none, qualifier_immediate };
|
|
#define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_unsigned, qualifier_immediate };
|
|
#define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
|
|
#define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
|
|
#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate };
|
|
#define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
|
|
#define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
|
|
#define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
|
|
#define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
|
|
#define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
|
|
#define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
|
|
qualifier_immediate };
|
|
#define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_const_pointer_map_mode };
|
|
#define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
|
|
#define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_load1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_const_pointer_map_mode };
|
|
#define TYPES_LOAD1_U (aarch64_types_load1_u_qualifiers)
|
|
#define TYPES_LOADSTRUCT_U (aarch64_types_load1_u_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_load1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_poly, qualifier_const_pointer_map_mode };
|
|
#define TYPES_LOAD1_P (aarch64_types_load1_p_qualifiers)
|
|
#define TYPES_LOADSTRUCT_P (aarch64_types_load1_p_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_const_pointer_map_mode,
|
|
qualifier_none, qualifier_struct_load_store_lane_index };
|
|
#define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_loadstruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_const_pointer_map_mode,
|
|
qualifier_unsigned, qualifier_struct_load_store_lane_index };
|
|
#define TYPES_LOADSTRUCT_LANE_U (aarch64_types_loadstruct_lane_u_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_loadstruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_poly, qualifier_const_pointer_map_mode,
|
|
qualifier_poly, qualifier_struct_load_store_lane_index };
|
|
#define TYPES_LOADSTRUCT_LANE_P (aarch64_types_loadstruct_lane_p_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_poly, qualifier_unsigned,
|
|
qualifier_poly, qualifier_poly };
|
|
#define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_none, qualifier_unsigned,
|
|
qualifier_none, qualifier_none };
|
|
#define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_unsigned, qualifier_unsigned,
|
|
qualifier_unsigned, qualifier_unsigned };
|
|
#define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
|
|
|
|
/* The first argument (return type) of a store should be void type,
|
|
which we represent with qualifier_void. Their first operand will be
|
|
a DImode pointer to the location to store to, so we must use
|
|
qualifier_map_mode | qualifier_pointer to build a pointer to the
|
|
element type of the vector. */
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
|
|
#define TYPES_STORE1 (aarch64_types_store1_qualifiers)
|
|
#define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_store1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_void, qualifier_pointer_map_mode, qualifier_unsigned };
|
|
#define TYPES_STORE1_U (aarch64_types_store1_u_qualifiers)
|
|
#define TYPES_STORESTRUCT_U (aarch64_types_store1_u_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
|
|
#define TYPES_STORE1_P (aarch64_types_store1_p_qualifiers)
|
|
#define TYPES_STORESTRUCT_P (aarch64_types_store1_p_qualifiers)
|
|
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_void, qualifier_pointer_map_mode,
|
|
qualifier_none, qualifier_struct_load_store_lane_index };
|
|
#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_storestruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_void, qualifier_pointer_map_mode,
|
|
qualifier_unsigned, qualifier_struct_load_store_lane_index };
|
|
#define TYPES_STORESTRUCT_LANE_U (aarch64_types_storestruct_lane_u_qualifiers)
|
|
static enum aarch64_type_qualifiers
|
|
aarch64_types_storestruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
= { qualifier_void, qualifier_pointer_map_mode,
|
|
qualifier_poly, qualifier_struct_load_store_lane_index };
|
|
#define TYPES_STORESTRUCT_LANE_P (aarch64_types_storestruct_lane_p_qualifiers)
|
|
|
|
#define CF0(N, X) CODE_FOR_aarch64_##N##X
|
|
#define CF1(N, X) CODE_FOR_##N##X##1
|
|
#define CF2(N, X) CODE_FOR_##N##X##2
|
|
#define CF3(N, X) CODE_FOR_##N##X##3
|
|
#define CF4(N, X) CODE_FOR_##N##X##4
|
|
#define CF10(N, X) CODE_FOR_##N##X
|
|
|
|
#define VAR1(T, N, MAP, FLAG, A) \
|
|
{#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T, FLAG_##FLAG},
|
|
#define VAR2(T, N, MAP, FLAG, A, B) \
|
|
VAR1 (T, N, MAP, FLAG, A) \
|
|
VAR1 (T, N, MAP, FLAG, B)
|
|
#define VAR3(T, N, MAP, FLAG, A, B, C) \
|
|
VAR2 (T, N, MAP, FLAG, A, B) \
|
|
VAR1 (T, N, MAP, FLAG, C)
|
|
#define VAR4(T, N, MAP, FLAG, A, B, C, D) \
|
|
VAR3 (T, N, MAP, FLAG, A, B, C) \
|
|
VAR1 (T, N, MAP, FLAG, D)
|
|
#define VAR5(T, N, MAP, FLAG, A, B, C, D, E) \
|
|
VAR4 (T, N, MAP, FLAG, A, B, C, D) \
|
|
VAR1 (T, N, MAP, FLAG, E)
|
|
#define VAR6(T, N, MAP, FLAG, A, B, C, D, E, F) \
|
|
VAR5 (T, N, MAP, FLAG, A, B, C, D, E) \
|
|
VAR1 (T, N, MAP, FLAG, F)
|
|
#define VAR7(T, N, MAP, FLAG, A, B, C, D, E, F, G) \
|
|
VAR6 (T, N, MAP, FLAG, A, B, C, D, E, F) \
|
|
VAR1 (T, N, MAP, FLAG, G)
|
|
#define VAR8(T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
|
|
VAR7 (T, N, MAP, FLAG, A, B, C, D, E, F, G) \
|
|
VAR1 (T, N, MAP, FLAG, H)
|
|
#define VAR9(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
|
|
VAR8 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
|
|
VAR1 (T, N, MAP, FLAG, I)
|
|
#define VAR10(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
|
|
VAR9 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
|
|
VAR1 (T, N, MAP, FLAG, J)
|
|
#define VAR11(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
|
|
VAR10 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
|
|
VAR1 (T, N, MAP, FLAG, K)
|
|
#define VAR12(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
|
|
VAR11 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
|
|
VAR1 (T, N, MAP, FLAG, L)
|
|
#define VAR13(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
|
|
VAR12 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
|
|
VAR1 (T, N, MAP, FLAG, M)
|
|
#define VAR14(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
|
|
VAR13 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
|
|
VAR1 (T, X, MAP, FLAG, N)
|
|
#define VAR15(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
|
|
VAR14 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
|
|
VAR1 (T, X, MAP, FLAG, O)
|
|
#define VAR16(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
|
|
VAR15 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
|
|
VAR1 (T, X, MAP, FLAG, P)
|
|
|
|
#include "aarch64-builtin-iterators.h"
|
|
|
|
static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
|
|
#include "aarch64-simd-builtins.def"
|
|
};
|
|
|
|
/* There's only 8 CRC32 builtins. Probably not worth their own .def file. */
|
|
#define AARCH64_CRC32_BUILTINS \
|
|
CRC32_BUILTIN (crc32b, QI) \
|
|
CRC32_BUILTIN (crc32h, HI) \
|
|
CRC32_BUILTIN (crc32w, SI) \
|
|
CRC32_BUILTIN (crc32x, DI) \
|
|
CRC32_BUILTIN (crc32cb, QI) \
|
|
CRC32_BUILTIN (crc32ch, HI) \
|
|
CRC32_BUILTIN (crc32cw, SI) \
|
|
CRC32_BUILTIN (crc32cx, DI)
|
|
|
|
/* The next 8 FCMLA instrinsics require some special handling compared the
|
|
normal simd intrinsics. */
|
|
#define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
|
|
FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
|
|
FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
|
|
FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
|
|
FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
|
|
FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
|
|
FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
|
|
FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
|
|
FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
|
|
|
|
typedef struct
|
|
{
|
|
const char *name;
|
|
machine_mode mode;
|
|
const enum insn_code icode;
|
|
unsigned int fcode;
|
|
} aarch64_crc_builtin_datum;
|
|
|
|
/* Hold information about how to expand the FCMLA_LANEQ builtins. */
|
|
typedef struct
|
|
{
|
|
const char *name;
|
|
machine_mode mode;
|
|
const enum insn_code icode;
|
|
unsigned int fcode;
|
|
bool lane;
|
|
} aarch64_fcmla_laneq_builtin_datum;
|
|
|
|
#define CRC32_BUILTIN(N, M) \
|
|
AARCH64_BUILTIN_##N,
|
|
|
|
#define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
|
|
AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
|
|
|
|
#undef VAR1
|
|
#define VAR1(T, N, MAP, FLAG, A) \
|
|
AARCH64_SIMD_BUILTIN_##T##_##N##A,
|
|
|
|
enum aarch64_builtins
|
|
{
|
|
AARCH64_BUILTIN_MIN,
|
|
|
|
AARCH64_BUILTIN_GET_FPCR,
|
|
AARCH64_BUILTIN_SET_FPCR,
|
|
AARCH64_BUILTIN_GET_FPSR,
|
|
AARCH64_BUILTIN_SET_FPSR,
|
|
|
|
AARCH64_BUILTIN_GET_FPCR64,
|
|
AARCH64_BUILTIN_SET_FPCR64,
|
|
AARCH64_BUILTIN_GET_FPSR64,
|
|
AARCH64_BUILTIN_SET_FPSR64,
|
|
|
|
AARCH64_BUILTIN_RSQRT_DF,
|
|
AARCH64_BUILTIN_RSQRT_SF,
|
|
AARCH64_BUILTIN_RSQRT_V2DF,
|
|
AARCH64_BUILTIN_RSQRT_V2SF,
|
|
AARCH64_BUILTIN_RSQRT_V4SF,
|
|
AARCH64_SIMD_BUILTIN_BASE,
|
|
AARCH64_SIMD_BUILTIN_LANE_CHECK,
|
|
#include "aarch64-simd-builtins.def"
|
|
/* The first enum element which is based on an insn_data pattern. */
|
|
AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1,
|
|
AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START
|
|
+ ARRAY_SIZE (aarch64_simd_builtin_data) - 1,
|
|
AARCH64_CRC32_BUILTIN_BASE,
|
|
AARCH64_CRC32_BUILTINS
|
|
AARCH64_CRC32_BUILTIN_MAX,
|
|
/* ARMv8.3-A Pointer Authentication Builtins. */
|
|
AARCH64_PAUTH_BUILTIN_AUTIA1716,
|
|
AARCH64_PAUTH_BUILTIN_PACIA1716,
|
|
AARCH64_PAUTH_BUILTIN_AUTIB1716,
|
|
AARCH64_PAUTH_BUILTIN_PACIB1716,
|
|
AARCH64_PAUTH_BUILTIN_XPACLRI,
|
|
/* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */
|
|
AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
|
|
AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
|
|
/* Builtin for Arm8.3-a Javascript conversion instruction. */
|
|
AARCH64_JSCVT,
|
|
/* TME builtins. */
|
|
AARCH64_TME_BUILTIN_TSTART,
|
|
AARCH64_TME_BUILTIN_TCOMMIT,
|
|
AARCH64_TME_BUILTIN_TTEST,
|
|
AARCH64_TME_BUILTIN_TCANCEL,
|
|
/* Armv8.5-a RNG instruction builtins. */
|
|
AARCH64_BUILTIN_RNG_RNDR,
|
|
AARCH64_BUILTIN_RNG_RNDRRS,
|
|
/* MEMTAG builtins. */
|
|
AARCH64_MEMTAG_BUILTIN_START,
|
|
AARCH64_MEMTAG_BUILTIN_IRG,
|
|
AARCH64_MEMTAG_BUILTIN_GMI,
|
|
AARCH64_MEMTAG_BUILTIN_SUBP,
|
|
AARCH64_MEMTAG_BUILTIN_INC_TAG,
|
|
AARCH64_MEMTAG_BUILTIN_SET_TAG,
|
|
AARCH64_MEMTAG_BUILTIN_GET_TAG,
|
|
AARCH64_MEMTAG_BUILTIN_END,
|
|
/* LS64 builtins. */
|
|
AARCH64_LS64_BUILTIN_LD64B,
|
|
AARCH64_LS64_BUILTIN_ST64B,
|
|
AARCH64_LS64_BUILTIN_ST64BV,
|
|
AARCH64_LS64_BUILTIN_ST64BV0,
|
|
AARCH64_BUILTIN_MAX
|
|
};
|
|
|
|
#undef CRC32_BUILTIN
|
|
#define CRC32_BUILTIN(N, M) \
|
|
{"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
|
|
|
|
static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
|
|
AARCH64_CRC32_BUILTINS
|
|
};
|
|
|
|
|
|
#undef FCMLA_LANEQ_BUILTIN
|
|
#define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
|
|
{"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
|
|
AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
|
|
|
|
/* This structure contains how to manage the mapping form the builtin to the
|
|
instruction to generate in the backend and how to invoke the instruction. */
|
|
static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
|
|
AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
|
|
};
|
|
|
|
#undef CRC32_BUILTIN
|
|
|
|
static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
|
|
|
|
#define NUM_DREG_TYPES 6
|
|
#define NUM_QREG_TYPES 6
|
|
|
|
/* Internal scalar builtin types. These types are used to support
|
|
neon intrinsic builtins. They are _not_ user-visible types. Therefore
|
|
the mangling for these types are implementation defined. */
|
|
const char *aarch64_scalar_builtin_types[] = {
|
|
"__builtin_aarch64_simd_qi",
|
|
"__builtin_aarch64_simd_hi",
|
|
"__builtin_aarch64_simd_si",
|
|
"__builtin_aarch64_simd_hf",
|
|
"__builtin_aarch64_simd_sf",
|
|
"__builtin_aarch64_simd_di",
|
|
"__builtin_aarch64_simd_df",
|
|
"__builtin_aarch64_simd_poly8",
|
|
"__builtin_aarch64_simd_poly16",
|
|
"__builtin_aarch64_simd_poly64",
|
|
"__builtin_aarch64_simd_poly128",
|
|
"__builtin_aarch64_simd_ti",
|
|
"__builtin_aarch64_simd_uqi",
|
|
"__builtin_aarch64_simd_uhi",
|
|
"__builtin_aarch64_simd_usi",
|
|
"__builtin_aarch64_simd_udi",
|
|
"__builtin_aarch64_simd_ei",
|
|
"__builtin_aarch64_simd_oi",
|
|
"__builtin_aarch64_simd_ci",
|
|
"__builtin_aarch64_simd_xi",
|
|
"__builtin_aarch64_simd_bf",
|
|
NULL
|
|
};
|
|
|
|
#define ENTRY(E, M, Q, G) E,
|
|
enum aarch64_simd_type
|
|
{
|
|
#include "aarch64-simd-builtin-types.def"
|
|
ARM_NEON_H_TYPES_LAST
|
|
};
|
|
#undef ENTRY
|
|
|
|
struct GTY(()) aarch64_simd_type_info
|
|
{
|
|
enum aarch64_simd_type type;
|
|
|
|
/* Internal type name. */
|
|
const char *name;
|
|
|
|
/* Internal type name(mangled). The mangled names conform to the
|
|
AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture",
|
|
Appendix A). To qualify for emission with the mangled names defined in
|
|
that document, a vector type must not only be of the correct mode but also
|
|
be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these
|
|
types are registered by aarch64_init_simd_builtin_types (). In other
|
|
words, vector types defined in other ways e.g. via vector_size attribute
|
|
will get default mangled names. */
|
|
const char *mangle;
|
|
|
|
/* Internal type. */
|
|
tree itype;
|
|
|
|
/* Element type. */
|
|
tree eltype;
|
|
|
|
/* Machine mode the internal type maps to. */
|
|
enum machine_mode mode;
|
|
|
|
/* Qualifiers. */
|
|
enum aarch64_type_qualifiers q;
|
|
};
|
|
|
|
#define ENTRY(E, M, Q, G) \
|
|
{E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
|
|
static GTY(()) struct aarch64_simd_type_info aarch64_simd_types [] = {
|
|
#include "aarch64-simd-builtin-types.def"
|
|
};
|
|
#undef ENTRY
|
|
|
|
static machine_mode aarch64_simd_tuple_modes[ARM_NEON_H_TYPES_LAST][3];
|
|
static GTY(()) tree aarch64_simd_tuple_types[ARM_NEON_H_TYPES_LAST][3];
|
|
|
|
static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE;
|
|
static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE;
|
|
static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE;
|
|
|
|
/* The user-visible __fp16 type, and a pointer to that type. Used
|
|
across the back-end. */
|
|
tree aarch64_fp16_type_node = NULL_TREE;
|
|
tree aarch64_fp16_ptr_type_node = NULL_TREE;
|
|
|
|
/* Back-end node type for brain float (bfloat) types. */
|
|
tree aarch64_bf16_type_node = NULL_TREE;
|
|
tree aarch64_bf16_ptr_type_node = NULL_TREE;
|
|
|
|
/* Wrapper around add_builtin_function. NAME is the name of the built-in
|
|
function, TYPE is the function type, CODE is the function subcode
|
|
(relative to AARCH64_BUILTIN_GENERAL), and ATTRS is the function
|
|
attributes. */
|
|
static tree
|
|
aarch64_general_add_builtin (const char *name, tree type, unsigned int code,
|
|
tree attrs = NULL_TREE)
|
|
{
|
|
code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
|
|
return add_builtin_function (name, type, code, BUILT_IN_MD,
|
|
NULL, attrs);
|
|
}
|
|
|
|
static const char *
|
|
aarch64_mangle_builtin_scalar_type (const_tree type)
|
|
{
|
|
int i = 0;
|
|
|
|
while (aarch64_scalar_builtin_types[i] != NULL)
|
|
{
|
|
const char *name = aarch64_scalar_builtin_types[i];
|
|
|
|
if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
|
|
&& DECL_NAME (TYPE_NAME (type))
|
|
&& !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name))
|
|
return aarch64_scalar_builtin_types[i];
|
|
i++;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static const char *
|
|
aarch64_mangle_builtin_vector_type (const_tree type)
|
|
{
|
|
tree attrs = TYPE_ATTRIBUTES (type);
|
|
if (tree attr = lookup_attribute ("Advanced SIMD type", attrs))
|
|
{
|
|
tree mangled_name = TREE_VALUE (TREE_VALUE (attr));
|
|
return IDENTIFIER_POINTER (mangled_name);
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
const char *
|
|
aarch64_general_mangle_builtin_type (const_tree type)
|
|
{
|
|
const char *mangle;
|
|
/* Walk through all the AArch64 builtins types tables to filter out the
|
|
incoming type. */
|
|
if ((mangle = aarch64_mangle_builtin_vector_type (type))
|
|
|| (mangle = aarch64_mangle_builtin_scalar_type (type)))
|
|
return mangle;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static tree
|
|
aarch64_simd_builtin_std_type (machine_mode mode,
|
|
enum aarch64_type_qualifiers q)
|
|
{
|
|
#define QUAL_TYPE(M) \
|
|
((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node);
|
|
switch (mode)
|
|
{
|
|
case E_QImode:
|
|
return QUAL_TYPE (QI);
|
|
case E_HImode:
|
|
return QUAL_TYPE (HI);
|
|
case E_SImode:
|
|
return QUAL_TYPE (SI);
|
|
case E_DImode:
|
|
return QUAL_TYPE (DI);
|
|
case E_TImode:
|
|
return QUAL_TYPE (TI);
|
|
case E_OImode:
|
|
return aarch64_simd_intOI_type_node;
|
|
case E_CImode:
|
|
return aarch64_simd_intCI_type_node;
|
|
case E_XImode:
|
|
return aarch64_simd_intXI_type_node;
|
|
case E_HFmode:
|
|
return aarch64_fp16_type_node;
|
|
case E_SFmode:
|
|
return float_type_node;
|
|
case E_DFmode:
|
|
return double_type_node;
|
|
case E_BFmode:
|
|
return aarch64_bf16_type_node;
|
|
default:
|
|
gcc_unreachable ();
|
|
}
|
|
#undef QUAL_TYPE
|
|
}
|
|
|
|
static tree
|
|
aarch64_lookup_simd_builtin_type (machine_mode mode,
|
|
enum aarch64_type_qualifiers q)
|
|
{
|
|
int i;
|
|
int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
|
|
|
|
/* Non-poly scalar modes map to standard types not in the table. */
|
|
if (q != qualifier_poly && !VECTOR_MODE_P (mode))
|
|
return aarch64_simd_builtin_std_type (mode, q);
|
|
|
|
for (i = 0; i < nelts; i++)
|
|
{
|
|
if (aarch64_simd_types[i].mode == mode
|
|
&& aarch64_simd_types[i].q == q)
|
|
return aarch64_simd_types[i].itype;
|
|
if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
|
|
for (int j = 0; j < 3; j++)
|
|
if (aarch64_simd_tuple_modes[i][j] == mode
|
|
&& aarch64_simd_types[i].q == q)
|
|
return aarch64_simd_tuple_types[i][j];
|
|
}
|
|
|
|
return NULL_TREE;
|
|
}
|
|
|
|
static tree
|
|
aarch64_simd_builtin_type (machine_mode mode,
|
|
bool unsigned_p, bool poly_p)
|
|
{
|
|
if (poly_p)
|
|
return aarch64_lookup_simd_builtin_type (mode, qualifier_poly);
|
|
else if (unsigned_p)
|
|
return aarch64_lookup_simd_builtin_type (mode, qualifier_unsigned);
|
|
else
|
|
return aarch64_lookup_simd_builtin_type (mode, qualifier_none);
|
|
}
|
|
|
|
static void
|
|
aarch64_init_simd_builtin_types (void)
|
|
{
|
|
int i;
|
|
int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
|
|
tree tdecl;
|
|
|
|
/* Init all the element types built by the front-end. */
|
|
aarch64_simd_types[Int8x8_t].eltype = intQI_type_node;
|
|
aarch64_simd_types[Int8x16_t].eltype = intQI_type_node;
|
|
aarch64_simd_types[Int16x4_t].eltype = intHI_type_node;
|
|
aarch64_simd_types[Int16x8_t].eltype = intHI_type_node;
|
|
aarch64_simd_types[Int32x2_t].eltype = intSI_type_node;
|
|
aarch64_simd_types[Int32x4_t].eltype = intSI_type_node;
|
|
aarch64_simd_types[Int64x1_t].eltype = intDI_type_node;
|
|
aarch64_simd_types[Int64x2_t].eltype = intDI_type_node;
|
|
aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node;
|
|
aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node;
|
|
aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node;
|
|
aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node;
|
|
aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node;
|
|
aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node;
|
|
aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node;
|
|
aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node;
|
|
|
|
/* Poly types are a world of their own. */
|
|
aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype =
|
|
build_distinct_type_copy (unsigned_intQI_type_node);
|
|
/* Prevent front-ends from transforming Poly8_t arrays into string
|
|
literals. */
|
|
TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false;
|
|
|
|
aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype =
|
|
build_distinct_type_copy (unsigned_intHI_type_node);
|
|
aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype =
|
|
build_distinct_type_copy (unsigned_intDI_type_node);
|
|
aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype =
|
|
build_distinct_type_copy (unsigned_intTI_type_node);
|
|
/* Init poly vector element types with scalar poly types. */
|
|
aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype;
|
|
aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype;
|
|
aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype;
|
|
aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype;
|
|
aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype;
|
|
aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
|
|
|
|
/* Continue with standard types. */
|
|
aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
|
|
aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
|
|
aarch64_simd_types[Float32x2_t].eltype = float_type_node;
|
|
aarch64_simd_types[Float32x4_t].eltype = float_type_node;
|
|
aarch64_simd_types[Float64x1_t].eltype = double_type_node;
|
|
aarch64_simd_types[Float64x2_t].eltype = double_type_node;
|
|
|
|
/* Init Bfloat vector types with underlying __bf16 type. */
|
|
aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
|
|
aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
|
|
|
|
for (i = 0; i < nelts; i++)
|
|
{
|
|
tree eltype = aarch64_simd_types[i].eltype;
|
|
machine_mode mode = aarch64_simd_types[i].mode;
|
|
|
|
if (aarch64_simd_types[i].itype == NULL)
|
|
{
|
|
tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
|
|
type = build_distinct_type_copy (type);
|
|
SET_TYPE_STRUCTURAL_EQUALITY (type);
|
|
|
|
tree mangled_name = get_identifier (aarch64_simd_types[i].mangle);
|
|
tree value = tree_cons (NULL_TREE, mangled_name, NULL_TREE);
|
|
TYPE_ATTRIBUTES (type)
|
|
= tree_cons (get_identifier ("Advanced SIMD type"), value,
|
|
TYPE_ATTRIBUTES (type));
|
|
aarch64_simd_types[i].itype = type;
|
|
}
|
|
|
|
tdecl = add_builtin_type (aarch64_simd_types[i].name,
|
|
aarch64_simd_types[i].itype);
|
|
TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
|
|
}
|
|
|
|
#define AARCH64_BUILD_SIGNED_TYPE(mode) \
|
|
make_signed_type (GET_MODE_PRECISION (mode));
|
|
aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode);
|
|
aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode);
|
|
aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode);
|
|
#undef AARCH64_BUILD_SIGNED_TYPE
|
|
|
|
tdecl = add_builtin_type
|
|
("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node);
|
|
TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl;
|
|
tdecl = add_builtin_type
|
|
("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node);
|
|
TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl;
|
|
tdecl = add_builtin_type
|
|
("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node);
|
|
TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl;
|
|
}
|
|
|
|
static void
|
|
aarch64_init_simd_builtin_scalar_types (void)
|
|
{
|
|
/* Define typedefs for all the standard scalar types. */
|
|
(*lang_hooks.types.register_builtin_type) (intQI_type_node,
|
|
"__builtin_aarch64_simd_qi");
|
|
(*lang_hooks.types.register_builtin_type) (intHI_type_node,
|
|
"__builtin_aarch64_simd_hi");
|
|
(*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
|
|
"__builtin_aarch64_simd_hf");
|
|
(*lang_hooks.types.register_builtin_type) (intSI_type_node,
|
|
"__builtin_aarch64_simd_si");
|
|
(*lang_hooks.types.register_builtin_type) (float_type_node,
|
|
"__builtin_aarch64_simd_sf");
|
|
(*lang_hooks.types.register_builtin_type) (intDI_type_node,
|
|
"__builtin_aarch64_simd_di");
|
|
(*lang_hooks.types.register_builtin_type) (double_type_node,
|
|
"__builtin_aarch64_simd_df");
|
|
(*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
|
|
"__builtin_aarch64_simd_poly8");
|
|
(*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
|
|
"__builtin_aarch64_simd_poly16");
|
|
(*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
|
|
"__builtin_aarch64_simd_poly64");
|
|
(*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node,
|
|
"__builtin_aarch64_simd_poly128");
|
|
(*lang_hooks.types.register_builtin_type) (intTI_type_node,
|
|
"__builtin_aarch64_simd_ti");
|
|
(*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node,
|
|
"__builtin_aarch64_simd_bf");
|
|
/* Unsigned integer types for various mode sizes. */
|
|
(*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
|
|
"__builtin_aarch64_simd_uqi");
|
|
(*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
|
|
"__builtin_aarch64_simd_uhi");
|
|
(*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node,
|
|
"__builtin_aarch64_simd_usi");
|
|
(*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
|
|
"__builtin_aarch64_simd_udi");
|
|
}
|
|
|
|
/* Return a set of FLAG_* flags derived from FLAGS
|
|
that describe what a function with result MODE could do,
|
|
taking the command-line flags into account. */
|
|
static unsigned int
|
|
aarch64_call_properties (unsigned int flags, machine_mode mode)
|
|
{
|
|
if (!(flags & FLAG_AUTO_FP) && FLOAT_MODE_P (mode))
|
|
flags |= FLAG_FP;
|
|
|
|
/* -fno-trapping-math means that we can assume any FP exceptions
|
|
are not user-visible. */
|
|
if (!flag_trapping_math)
|
|
flags &= ~FLAG_RAISE_FP_EXCEPTIONS;
|
|
|
|
return flags;
|
|
}
|
|
|
|
/* Return true if calls to a function with flags F and mode MODE
|
|
could modify some form of global state. */
|
|
static bool
|
|
aarch64_modifies_global_state_p (unsigned int f, machine_mode mode)
|
|
{
|
|
unsigned int flags = aarch64_call_properties (f, mode);
|
|
|
|
if (flags & FLAG_RAISE_FP_EXCEPTIONS)
|
|
return true;
|
|
|
|
if (flags & FLAG_PREFETCH_MEMORY)
|
|
return true;
|
|
|
|
return flags & FLAG_WRITE_MEMORY;
|
|
}
|
|
|
|
/* Return true if calls to a function with flags F and mode MODE
|
|
could read some form of global state. */
|
|
static bool
|
|
aarch64_reads_global_state_p (unsigned int f, machine_mode mode)
|
|
{
|
|
unsigned int flags = aarch64_call_properties (f, mode);
|
|
|
|
if (flags & FLAG_READ_FPCR)
|
|
return true;
|
|
|
|
return flags & FLAG_READ_MEMORY;
|
|
}
|
|
|
|
/* Return true if calls to a function with flags F and mode MODE
|
|
could raise a signal. */
|
|
static bool
|
|
aarch64_could_trap_p (unsigned int f, machine_mode mode)
|
|
{
|
|
unsigned int flags = aarch64_call_properties (f, mode);
|
|
|
|
if (flags & FLAG_RAISE_FP_EXCEPTIONS)
|
|
return true;
|
|
|
|
if (flags & (FLAG_READ_MEMORY | FLAG_WRITE_MEMORY))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Add attribute NAME to ATTRS. */
|
|
static tree
|
|
aarch64_add_attribute (const char *name, tree attrs)
|
|
{
|
|
return tree_cons (get_identifier (name), NULL_TREE, attrs);
|
|
}
|
|
|
|
/* Return the appropriate attributes for a function that has
|
|
flags F and mode MODE. */
|
|
static tree
|
|
aarch64_get_attributes (unsigned int f, machine_mode mode)
|
|
{
|
|
tree attrs = NULL_TREE;
|
|
|
|
if (!aarch64_modifies_global_state_p (f, mode))
|
|
{
|
|
if (aarch64_reads_global_state_p (f, mode))
|
|
attrs = aarch64_add_attribute ("pure", attrs);
|
|
else
|
|
attrs = aarch64_add_attribute ("const", attrs);
|
|
}
|
|
|
|
if (!flag_non_call_exceptions || !aarch64_could_trap_p (f, mode))
|
|
attrs = aarch64_add_attribute ("nothrow", attrs);
|
|
|
|
return aarch64_add_attribute ("leaf", attrs);
|
|
}
|
|
|
|
static bool aarch64_simd_builtins_initialized_p = false;
|
|
|
|
/* Due to the architecture not providing lane variant of the lane instructions
|
|
for fcmla we can't use the standard simd builtin expansion code, but we
|
|
still want the majority of the validation that would normally be done. */
|
|
|
|
void
|
|
aarch64_init_fcmla_laneq_builtins (void)
|
|
{
|
|
unsigned int i = 0;
|
|
|
|
for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
|
|
{
|
|
aarch64_fcmla_laneq_builtin_datum* d
|
|
= &aarch64_fcmla_lane_builtin_data[i];
|
|
tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none);
|
|
machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
|
|
tree quadtype
|
|
= aarch64_lookup_simd_builtin_type (quadmode, qualifier_none);
|
|
tree lanetype
|
|
= aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index);
|
|
tree ftype = build_function_type_list (argtype, argtype, argtype,
|
|
quadtype, lanetype, NULL_TREE);
|
|
tree attrs = aarch64_get_attributes (FLAG_FP, d->mode);
|
|
tree fndecl
|
|
= aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
|
|
|
|
aarch64_builtin_decls[d->fcode] = fndecl;
|
|
}
|
|
}
|
|
|
|
void
|
|
aarch64_init_simd_builtin_functions (bool called_from_pragma)
|
|
{
|
|
unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
|
|
|
|
if (!called_from_pragma)
|
|
{
|
|
tree lane_check_fpr = build_function_type_list (void_type_node,
|
|
size_type_node,
|
|
size_type_node,
|
|
intSI_type_node,
|
|
NULL);
|
|
aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
|
|
lane_check_fpr,
|
|
AARCH64_SIMD_BUILTIN_LANE_CHECK);
|
|
}
|
|
|
|
for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
|
|
{
|
|
bool print_type_signature_p = false;
|
|
char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 };
|
|
aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
|
|
char namebuf[60];
|
|
tree ftype = NULL;
|
|
tree fndecl = NULL;
|
|
|
|
d->fcode = fcode;
|
|
|
|
/* We must track two variables here. op_num is
|
|
the operand number as in the RTL pattern. This is
|
|
required to access the mode (e.g. V4SF mode) of the
|
|
argument, from which the base type can be derived.
|
|
arg_num is an index in to the qualifiers data, which
|
|
gives qualifiers to the type (e.g. const unsigned).
|
|
The reason these two variables may differ by one is the
|
|
void return type. While all return types take the 0th entry
|
|
in the qualifiers array, there is no operand for them in the
|
|
RTL pattern. */
|
|
int op_num = insn_data[d->code].n_operands - 1;
|
|
int arg_num = d->qualifiers[0] & qualifier_void
|
|
? op_num + 1
|
|
: op_num;
|
|
tree return_type = void_type_node, args = void_list_node;
|
|
tree eltype;
|
|
|
|
int struct_mode_args = 0;
|
|
for (int j = op_num; j >= 0; j--)
|
|
{
|
|
machine_mode op_mode = insn_data[d->code].operand[j].mode;
|
|
if (aarch64_advsimd_struct_mode_p (op_mode))
|
|
struct_mode_args++;
|
|
}
|
|
|
|
if ((called_from_pragma && struct_mode_args == 0)
|
|
|| (!called_from_pragma && struct_mode_args > 0))
|
|
continue;
|
|
|
|
/* Build a function type directly from the insn_data for this
|
|
builtin. The build_function_type () function takes care of
|
|
removing duplicates for us. */
|
|
for (; op_num >= 0; arg_num--, op_num--)
|
|
{
|
|
machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
|
|
enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];
|
|
|
|
if (qualifiers & qualifier_unsigned)
|
|
{
|
|
type_signature[op_num] = 'u';
|
|
print_type_signature_p = true;
|
|
}
|
|
else if (qualifiers & qualifier_poly)
|
|
{
|
|
type_signature[op_num] = 'p';
|
|
print_type_signature_p = true;
|
|
}
|
|
else
|
|
type_signature[op_num] = 's';
|
|
|
|
/* Skip an internal operand for vget_{low, high}. */
|
|
if (qualifiers & qualifier_internal)
|
|
continue;
|
|
|
|
/* Some builtins have different user-facing types
|
|
for certain arguments, encoded in d->mode. */
|
|
if (qualifiers & qualifier_map_mode)
|
|
op_mode = d->mode;
|
|
|
|
/* For pointers, we want a pointer to the basic type
|
|
of the vector. */
|
|
if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
|
|
op_mode = GET_MODE_INNER (op_mode);
|
|
|
|
eltype = aarch64_simd_builtin_type
|
|
(op_mode,
|
|
(qualifiers & qualifier_unsigned) != 0,
|
|
(qualifiers & qualifier_poly) != 0);
|
|
gcc_assert (eltype != NULL);
|
|
|
|
/* Add qualifiers. */
|
|
if (qualifiers & qualifier_const)
|
|
eltype = build_qualified_type (eltype, TYPE_QUAL_CONST);
|
|
|
|
if (qualifiers & qualifier_pointer)
|
|
eltype = build_pointer_type (eltype);
|
|
|
|
/* If we have reached arg_num == 0, we are at a non-void
|
|
return type. Otherwise, we are still processing
|
|
arguments. */
|
|
if (arg_num == 0)
|
|
return_type = eltype;
|
|
else
|
|
args = tree_cons (NULL_TREE, eltype, args);
|
|
}
|
|
|
|
ftype = build_function_type (return_type, args);
|
|
|
|
gcc_assert (ftype != NULL);
|
|
|
|
if (print_type_signature_p)
|
|
snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
|
|
d->name, type_signature);
|
|
else
|
|
snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
|
|
d->name);
|
|
|
|
tree attrs = aarch64_get_attributes (d->flags, d->mode);
|
|
|
|
if (called_from_pragma)
|
|
{
|
|
unsigned int raw_code
|
|
= (fcode << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
|
|
fndecl = simulate_builtin_function_decl (input_location, namebuf,
|
|
ftype, raw_code, NULL,
|
|
attrs);
|
|
}
|
|
else
|
|
fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs);
|
|
|
|
aarch64_builtin_decls[fcode] = fndecl;
|
|
}
|
|
}
|
|
|
|
/* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
|
|
indexed by TYPE_INDEX. */
|
|
static void
|
|
register_tuple_type (unsigned int num_vectors, unsigned int type_index)
|
|
{
|
|
aarch64_simd_type_info *type = &aarch64_simd_types[type_index];
|
|
|
|
/* Synthesize the name of the user-visible vector tuple type. */
|
|
const char *vector_type_name = type->name;
|
|
char tuple_type_name[sizeof ("bfloat16x4x2_t")];
|
|
snprintf (tuple_type_name, sizeof (tuple_type_name), "%.*sx%d_t",
|
|
(int) strlen (vector_type_name) - 4, vector_type_name + 2,
|
|
num_vectors);
|
|
tuple_type_name[0] = TOLOWER (tuple_type_name[0]);
|
|
|
|
tree vector_type = type->itype;
|
|
tree array_type = build_array_type_nelts (vector_type, num_vectors);
|
|
if (type->mode == DImode)
|
|
{
|
|
if (num_vectors == 2)
|
|
SET_TYPE_MODE (array_type, V2x1DImode);
|
|
else if (num_vectors == 3)
|
|
SET_TYPE_MODE (array_type, V3x1DImode);
|
|
else if (num_vectors == 4)
|
|
SET_TYPE_MODE (array_type, V4x1DImode);
|
|
}
|
|
|
|
unsigned int alignment
|
|
= known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64;
|
|
machine_mode tuple_mode = TYPE_MODE_RAW (array_type);
|
|
gcc_assert (VECTOR_MODE_P (tuple_mode)
|
|
&& TYPE_MODE (array_type) == tuple_mode
|
|
&& TYPE_ALIGN (array_type) == alignment);
|
|
|
|
tree field = build_decl (input_location, FIELD_DECL,
|
|
get_identifier ("val"), array_type);
|
|
|
|
tree t = lang_hooks.types.simulate_record_decl (input_location,
|
|
tuple_type_name,
|
|
make_array_slice (&field,
|
|
1));
|
|
gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)
|
|
&& (flag_pack_struct
|
|
|| maximum_field_alignment
|
|
|| (TYPE_MODE_RAW (t) == tuple_mode
|
|
&& TYPE_ALIGN (t) == alignment)));
|
|
|
|
aarch64_simd_tuple_modes[type_index][num_vectors - 2] = tuple_mode;
|
|
aarch64_simd_tuple_types[type_index][num_vectors - 2] = t;
|
|
}
|
|
|
|
static bool
|
|
aarch64_scalar_builtin_type_p (aarch64_simd_type t)
|
|
{
|
|
return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
|
|
}
|
|
|
|
/* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
|
|
set. */
|
|
aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags)
|
|
: m_old_isa_flags (aarch64_isa_flags),
|
|
m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
|
|
{
|
|
/* Changing the ISA flags should be enough here. We shouldn't need to
|
|
pay the compile-time cost of a full target switch. */
|
|
aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags;
|
|
global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
|
|
}
|
|
|
|
aarch64_simd_switcher::~aarch64_simd_switcher ()
|
|
{
|
|
if (m_old_general_regs_only)
|
|
global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
|
|
aarch64_isa_flags = m_old_isa_flags;
|
|
}
|
|
|
|
/* Implement #pragma GCC aarch64 "arm_neon.h". */
|
|
void
|
|
handle_arm_neon_h (void)
|
|
{
|
|
aarch64_simd_switcher simd;
|
|
|
|
/* Register the AdvSIMD vector tuple types. */
|
|
for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
|
|
for (unsigned int count = 2; count <= 4; ++count)
|
|
if (!aarch64_scalar_builtin_type_p (aarch64_simd_types[i].type))
|
|
register_tuple_type (count, i);
|
|
|
|
aarch64_init_simd_builtin_functions (true);
|
|
}
|
|
|
|
void
|
|
aarch64_init_simd_builtins (void)
|
|
{
|
|
if (aarch64_simd_builtins_initialized_p)
|
|
return;
|
|
|
|
aarch64_simd_builtins_initialized_p = true;
|
|
|
|
aarch64_init_simd_builtin_types ();
|
|
|
|
/* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
|
|
Therefore we need to preserve the old __builtin scalar types. It can be
|
|
removed once all the intrinsics become strongly typed using the qualifier
|
|
system. */
|
|
aarch64_init_simd_builtin_scalar_types ();
|
|
|
|
aarch64_init_simd_builtin_functions (false);
|
|
if (in_lto_p)
|
|
handle_arm_neon_h ();
|
|
|
|
/* Initialize the remaining fcmla_laneq intrinsics. */
|
|
aarch64_init_fcmla_laneq_builtins ();
|
|
}
|
|
|
|
static void
|
|
aarch64_init_crc32_builtins ()
|
|
{
|
|
tree usi_type = aarch64_simd_builtin_std_type (SImode, qualifier_unsigned);
|
|
unsigned int i = 0;
|
|
|
|
for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
|
|
{
|
|
aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
|
|
tree argtype = aarch64_simd_builtin_std_type (d->mode,
|
|
qualifier_unsigned);
|
|
tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
|
|
tree attrs = aarch64_get_attributes (FLAG_NONE, d->mode);
|
|
tree fndecl
|
|
= aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
|
|
|
|
aarch64_builtin_decls[d->fcode] = fndecl;
|
|
}
|
|
}
|
|
|
|
/* Add builtins for reciprocal square root. */
|
|
|
|
void
|
|
aarch64_init_builtin_rsqrt (void)
|
|
{
|
|
tree fndecl = NULL;
|
|
tree ftype = NULL;
|
|
|
|
tree V2SF_type_node = build_vector_type (float_type_node, 2);
|
|
tree V2DF_type_node = build_vector_type (double_type_node, 2);
|
|
tree V4SF_type_node = build_vector_type (float_type_node, 4);
|
|
|
|
struct builtin_decls_data
|
|
{
|
|
tree type_node;
|
|
const char *builtin_name;
|
|
int function_code;
|
|
};
|
|
|
|
builtin_decls_data bdda[] =
|
|
{
|
|
{ double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF },
|
|
{ float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF },
|
|
{ V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF },
|
|
{ V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF },
|
|
{ V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF }
|
|
};
|
|
|
|
builtin_decls_data *bdd = bdda;
|
|
builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data));
|
|
|
|
for (; bdd < bdd_end; bdd++)
|
|
{
|
|
ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
|
|
tree attrs = aarch64_get_attributes (FLAG_FP, TYPE_MODE (bdd->type_node));
|
|
fndecl = aarch64_general_add_builtin (bdd->builtin_name,
|
|
ftype, bdd->function_code, attrs);
|
|
aarch64_builtin_decls[bdd->function_code] = fndecl;
|
|
}
|
|
}
|
|
|
|
/* Initialize the backend types that support the user-visible __fp16
|
|
type, also initialize a pointer to that type, to be used when
|
|
forming HFAs. */
|
|
|
|
static void
|
|
aarch64_init_fp16_types (void)
|
|
{
|
|
aarch64_fp16_type_node = make_node (REAL_TYPE);
|
|
TYPE_PRECISION (aarch64_fp16_type_node) = 16;
|
|
layout_type (aarch64_fp16_type_node);
|
|
|
|
(*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
|
|
aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
|
|
}
|
|
|
|
/* Initialize the backend REAL_TYPE type supporting bfloat types. */
|
|
static void
|
|
aarch64_init_bf16_types (void)
|
|
{
|
|
aarch64_bf16_type_node = make_node (REAL_TYPE);
|
|
TYPE_PRECISION (aarch64_bf16_type_node) = 16;
|
|
SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
|
|
layout_type (aarch64_bf16_type_node);
|
|
|
|
lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
|
|
aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
|
|
}
|
|
|
|
/* Pointer authentication builtins that will become NOP on legacy platform.
|
|
Currently, these builtins are for internal use only (libgcc EH unwinder). */
|
|
|
|
void
|
|
aarch64_init_pauth_hint_builtins (void)
|
|
{
|
|
/* Pointer Authentication builtins. */
|
|
tree ftype_pointer_auth
|
|
= build_function_type_list (ptr_type_node, ptr_type_node,
|
|
unsigned_intDI_type_node, NULL_TREE);
|
|
tree ftype_pointer_strip
|
|
= build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
|
|
|
|
aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_autia1716",
|
|
ftype_pointer_auth,
|
|
AARCH64_PAUTH_BUILTIN_AUTIA1716);
|
|
aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_pacia1716",
|
|
ftype_pointer_auth,
|
|
AARCH64_PAUTH_BUILTIN_PACIA1716);
|
|
aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_autib1716",
|
|
ftype_pointer_auth,
|
|
AARCH64_PAUTH_BUILTIN_AUTIB1716);
|
|
aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_pacib1716",
|
|
ftype_pointer_auth,
|
|
AARCH64_PAUTH_BUILTIN_PACIB1716);
|
|
aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_xpaclri",
|
|
ftype_pointer_strip,
|
|
AARCH64_PAUTH_BUILTIN_XPACLRI);
|
|
}
|
|
|
|
/* Initialize the transactional memory extension (TME) builtins. */
|
|
static void
|
|
aarch64_init_tme_builtins (void)
|
|
{
|
|
tree ftype_uint64_void
|
|
= build_function_type_list (uint64_type_node, NULL);
|
|
tree ftype_void_void
|
|
= build_function_type_list (void_type_node, NULL);
|
|
tree ftype_void_uint64
|
|
= build_function_type_list (void_type_node, uint64_type_node, NULL);
|
|
|
|
aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_tstart",
|
|
ftype_uint64_void,
|
|
AARCH64_TME_BUILTIN_TSTART);
|
|
aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_ttest",
|
|
ftype_uint64_void,
|
|
AARCH64_TME_BUILTIN_TTEST);
|
|
aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
|
|
ftype_void_void,
|
|
AARCH64_TME_BUILTIN_TCOMMIT);
|
|
aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
|
|
ftype_void_uint64,
|
|
AARCH64_TME_BUILTIN_TCANCEL);
|
|
}
|
|
|
|
/* Add builtins for Random Number instructions. */
|
|
|
|
static void
|
|
aarch64_init_rng_builtins (void)
|
|
{
|
|
tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
|
|
tree ftype
|
|
= build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
|
|
aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype,
|
|
AARCH64_BUILTIN_RNG_RNDR);
|
|
aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype,
|
|
AARCH64_BUILTIN_RNG_RNDRRS);
|
|
}
|
|
|
|
/* Initialize the memory tagging extension (MTE) builtins. */
|
|
struct
|
|
{
|
|
tree ftype;
|
|
enum insn_code icode;
|
|
} aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END -
|
|
AARCH64_MEMTAG_BUILTIN_START - 1];
|
|
|
|
static void
|
|
aarch64_init_memtag_builtins (void)
|
|
{
|
|
tree fntype = NULL;
|
|
|
|
#define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
|
|
aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
|
|
T, AARCH64_MEMTAG_BUILTIN_##F); \
|
|
aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
|
|
AARCH64_MEMTAG_BUILTIN_START - 1] = \
|
|
{T, CODE_FOR_##I};
|
|
|
|
fntype = build_function_type_list (ptr_type_node, ptr_type_node,
|
|
uint64_type_node, NULL);
|
|
AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);
|
|
|
|
fntype = build_function_type_list (uint64_type_node, ptr_type_node,
|
|
uint64_type_node, NULL);
|
|
AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);
|
|
|
|
fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
|
|
ptr_type_node, NULL);
|
|
AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);
|
|
|
|
fntype = build_function_type_list (ptr_type_node, ptr_type_node,
|
|
unsigned_type_node, NULL);
|
|
AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);
|
|
|
|
fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
|
|
AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
|
|
|
|
fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
|
|
AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype);
|
|
|
|
#undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
|
|
}
|
|
|
|
/* Add builtins for Load/store 64 Byte instructions. */
|
|
|
|
typedef struct
|
|
{
|
|
const char *name;
|
|
unsigned int code;
|
|
tree type;
|
|
} ls64_builtins_data;
|
|
|
|
static GTY(()) tree ls64_arm_data_t = NULL_TREE;
|
|
|
|
static void
|
|
aarch64_init_ls64_builtins_types (void)
|
|
{
|
|
/* Synthesize:
|
|
|
|
typedef struct {
|
|
uint64_t val[8];
|
|
} __arm_data512_t; */
|
|
const char *tuple_type_name = "__arm_data512_t";
|
|
tree node_type = get_typenode_from_name (UINT64_TYPE);
|
|
tree array_type = build_array_type_nelts (node_type, 8);
|
|
SET_TYPE_MODE (array_type, V8DImode);
|
|
|
|
gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type));
|
|
gcc_assert (TYPE_ALIGN (array_type) == 64);
|
|
|
|
tree field = build_decl (input_location, FIELD_DECL,
|
|
get_identifier ("val"), array_type);
|
|
|
|
ls64_arm_data_t = lang_hooks.types.simulate_record_decl (input_location,
|
|
tuple_type_name,
|
|
make_array_slice (&field, 1));
|
|
|
|
gcc_assert (TYPE_MODE (ls64_arm_data_t) == V8DImode);
|
|
gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t) == TYPE_MODE (ls64_arm_data_t));
|
|
gcc_assert (TYPE_ALIGN (ls64_arm_data_t) == 64);
|
|
}
|
|
|
|
static void
|
|
aarch64_init_ls64_builtins (void)
|
|
{
|
|
aarch64_init_ls64_builtins_types ();
|
|
|
|
ls64_builtins_data data[4] = {
|
|
{"__builtin_aarch64_ld64b", AARCH64_LS64_BUILTIN_LD64B,
|
|
build_function_type_list (ls64_arm_data_t,
|
|
const_ptr_type_node, NULL_TREE)},
|
|
{"__builtin_aarch64_st64b", AARCH64_LS64_BUILTIN_ST64B,
|
|
build_function_type_list (void_type_node, ptr_type_node,
|
|
ls64_arm_data_t, NULL_TREE)},
|
|
{"__builtin_aarch64_st64bv", AARCH64_LS64_BUILTIN_ST64BV,
|
|
build_function_type_list (uint64_type_node, ptr_type_node,
|
|
ls64_arm_data_t, NULL_TREE)},
|
|
{"__builtin_aarch64_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0,
|
|
build_function_type_list (uint64_type_node, ptr_type_node,
|
|
ls64_arm_data_t, NULL_TREE)},
|
|
};
|
|
|
|
for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
|
|
aarch64_builtin_decls[data[i].code]
|
|
= aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code);
|
|
}
|
|
|
|
/* Implement #pragma GCC aarch64 "arm_acle.h". */
|
|
void
|
|
handle_arm_acle_h (void)
|
|
{
|
|
if (TARGET_LS64)
|
|
aarch64_init_ls64_builtins ();
|
|
}
|
|
|
|
/* Initialize fpsr fpcr getters and setters. */
|
|
|
|
static void
|
|
aarch64_init_fpsr_fpcr_builtins (void)
|
|
{
|
|
tree ftype_set
|
|
= build_function_type_list (void_type_node, unsigned_type_node, NULL);
|
|
tree ftype_get
|
|
= build_function_type_list (unsigned_type_node, NULL);
|
|
|
|
aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr",
|
|
ftype_get,
|
|
AARCH64_BUILTIN_GET_FPCR);
|
|
aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr",
|
|
ftype_set,
|
|
AARCH64_BUILTIN_SET_FPCR);
|
|
aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr",
|
|
ftype_get,
|
|
AARCH64_BUILTIN_GET_FPSR);
|
|
aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr",
|
|
ftype_set,
|
|
AARCH64_BUILTIN_SET_FPSR);
|
|
|
|
ftype_set
|
|
= build_function_type_list (void_type_node, long_long_unsigned_type_node,
|
|
NULL);
|
|
ftype_get
|
|
= build_function_type_list (long_long_unsigned_type_node, NULL);
|
|
|
|
aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR64]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr64",
|
|
ftype_get,
|
|
AARCH64_BUILTIN_GET_FPCR64);
|
|
aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR64]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr64",
|
|
ftype_set,
|
|
AARCH64_BUILTIN_SET_FPCR64);
|
|
aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR64]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr64",
|
|
ftype_get,
|
|
AARCH64_BUILTIN_GET_FPSR64);
|
|
aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR64]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr64",
|
|
ftype_set,
|
|
AARCH64_BUILTIN_SET_FPSR64);
|
|
}
|
|
|
|
/* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */
|
|
|
|
void
|
|
aarch64_general_init_builtins (void)
|
|
{
|
|
aarch64_init_fpsr_fpcr_builtins ();
|
|
|
|
aarch64_init_fp16_types ();
|
|
|
|
aarch64_init_bf16_types ();
|
|
|
|
{
|
|
aarch64_simd_switcher simd;
|
|
aarch64_init_simd_builtins ();
|
|
}
|
|
|
|
aarch64_init_crc32_builtins ();
|
|
aarch64_init_builtin_rsqrt ();
|
|
aarch64_init_rng_builtins ();
|
|
|
|
tree ftype_jcvt
|
|
= build_function_type_list (intSI_type_node, double_type_node, NULL);
|
|
aarch64_builtin_decls[AARCH64_JSCVT]
|
|
= aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt,
|
|
AARCH64_JSCVT);
|
|
|
|
/* Initialize pointer authentication builtins which are backed by instructions
|
|
in NOP encoding space.
|
|
|
|
NOTE: these builtins are supposed to be used by libgcc unwinder only, as
|
|
there is no support on return address signing under ILP32, we don't
|
|
register them. */
|
|
if (!TARGET_ILP32)
|
|
aarch64_init_pauth_hint_builtins ();
|
|
|
|
if (TARGET_TME)
|
|
aarch64_init_tme_builtins ();
|
|
|
|
if (TARGET_MEMTAG)
|
|
aarch64_init_memtag_builtins ();
|
|
}
|
|
|
|
/* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */
|
|
tree
|
|
aarch64_general_builtin_decl (unsigned code, bool)
|
|
{
|
|
if (code >= AARCH64_BUILTIN_MAX)
|
|
return error_mark_node;
|
|
|
|
return aarch64_builtin_decls[code];
|
|
}
|
|
|
|
typedef enum
|
|
{
|
|
SIMD_ARG_COPY_TO_REG,
|
|
SIMD_ARG_CONSTANT,
|
|
SIMD_ARG_LANE_INDEX,
|
|
SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
|
|
SIMD_ARG_LANE_PAIR_INDEX,
|
|
SIMD_ARG_LANE_QUADTUP_INDEX,
|
|
SIMD_ARG_STOP
|
|
} builtin_simd_arg;
|
|
|
|
|
|
static rtx
|
|
aarch64_simd_expand_args (rtx target, int icode, int have_retval,
|
|
tree exp, builtin_simd_arg *args,
|
|
machine_mode builtin_mode)
|
|
{
|
|
rtx pat;
|
|
rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */
|
|
int opc = 0;
|
|
|
|
if (have_retval)
|
|
{
|
|
machine_mode tmode = insn_data[icode].operand[0].mode;
|
|
if (!target
|
|
|| GET_MODE (target) != tmode
|
|
|| !(*insn_data[icode].operand[0].predicate) (target, tmode))
|
|
target = gen_reg_rtx (tmode);
|
|
op[opc++] = target;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
builtin_simd_arg thisarg = args[opc - have_retval];
|
|
|
|
if (thisarg == SIMD_ARG_STOP)
|
|
break;
|
|
else
|
|
{
|
|
tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
|
|
machine_mode mode = insn_data[icode].operand[opc].mode;
|
|
op[opc] = expand_normal (arg);
|
|
|
|
switch (thisarg)
|
|
{
|
|
case SIMD_ARG_COPY_TO_REG:
|
|
if (POINTER_TYPE_P (TREE_TYPE (arg)))
|
|
op[opc] = convert_memory_address (Pmode, op[opc]);
|
|
/*gcc_assert (GET_MODE (op[opc]) == mode); */
|
|
if (!(*insn_data[icode].operand[opc].predicate)
|
|
(op[opc], mode))
|
|
op[opc] = copy_to_mode_reg (mode, op[opc]);
|
|
break;
|
|
|
|
case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
|
|
gcc_assert (opc > 1);
|
|
if (CONST_INT_P (op[opc]))
|
|
{
|
|
unsigned int nunits
|
|
= GET_MODE_NUNITS (builtin_mode).to_constant ();
|
|
aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
|
|
/* Keep to GCC-vector-extension lane indices in the RTL. */
|
|
op[opc] = aarch64_endian_lane_rtx (builtin_mode,
|
|
INTVAL (op[opc]));
|
|
}
|
|
goto constant_arg;
|
|
|
|
case SIMD_ARG_LANE_INDEX:
|
|
/* Must be a previous operand into which this is an index. */
|
|
gcc_assert (opc > 0);
|
|
if (CONST_INT_P (op[opc]))
|
|
{
|
|
machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
|
|
unsigned int nunits
|
|
= GET_MODE_NUNITS (vmode).to_constant ();
|
|
aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
|
|
/* Keep to GCC-vector-extension lane indices in the RTL. */
|
|
op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
|
|
}
|
|
/* If the lane index isn't a constant then error out. */
|
|
goto constant_arg;
|
|
|
|
case SIMD_ARG_LANE_PAIR_INDEX:
|
|
/* Must be a previous operand into which this is an index and
|
|
index is restricted to nunits / 2. */
|
|
gcc_assert (opc > 0);
|
|
if (CONST_INT_P (op[opc]))
|
|
{
|
|
machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
|
|
unsigned int nunits
|
|
= GET_MODE_NUNITS (vmode).to_constant ();
|
|
aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
|
|
/* Keep to GCC-vector-extension lane indices in the RTL. */
|
|
int lane = INTVAL (op[opc]);
|
|
op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
|
|
SImode);
|
|
}
|
|
/* If the lane index isn't a constant then error out. */
|
|
goto constant_arg;
|
|
case SIMD_ARG_LANE_QUADTUP_INDEX:
|
|
/* Must be a previous operand into which this is an index and
|
|
index is restricted to nunits / 4. */
|
|
gcc_assert (opc > 0);
|
|
if (CONST_INT_P (op[opc]))
|
|
{
|
|
machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
|
|
unsigned int nunits
|
|
= GET_MODE_NUNITS (vmode).to_constant ();
|
|
aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
|
|
/* Keep to GCC-vector-extension lane indices in the RTL. */
|
|
int lane = INTVAL (op[opc]);
|
|
op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
|
|
SImode);
|
|
}
|
|
/* If the lane index isn't a constant then error out. */
|
|
goto constant_arg;
|
|
case SIMD_ARG_CONSTANT:
|
|
constant_arg:
|
|
if (!(*insn_data[icode].operand[opc].predicate)
|
|
(op[opc], mode))
|
|
{
|
|
error_at (EXPR_LOCATION (exp),
|
|
"argument %d must be a constant immediate",
|
|
opc + 1 - have_retval);
|
|
return const0_rtx;
|
|
}
|
|
break;
|
|
|
|
case SIMD_ARG_STOP:
|
|
gcc_unreachable ();
|
|
}
|
|
|
|
opc++;
|
|
}
|
|
}
|
|
|
|
switch (opc)
|
|
{
|
|
case 1:
|
|
pat = GEN_FCN (icode) (op[0]);
|
|
break;
|
|
|
|
case 2:
|
|
pat = GEN_FCN (icode) (op[0], op[1]);
|
|
break;
|
|
|
|
case 3:
|
|
pat = GEN_FCN (icode) (op[0], op[1], op[2]);
|
|
break;
|
|
|
|
case 4:
|
|
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
|
|
break;
|
|
|
|
case 5:
|
|
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
|
|
break;
|
|
|
|
case 6:
|
|
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
|
|
break;
|
|
|
|
default:
|
|
gcc_unreachable ();
|
|
}
|
|
|
|
if (!pat)
|
|
return NULL_RTX;
|
|
|
|
emit_insn (pat);
|
|
|
|
return target;
|
|
}
|
|
|
|
/* Expand an AArch64 AdvSIMD builtin(intrinsic). */
|
|
rtx
|
|
aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
|
|
{
|
|
if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
|
|
{
|
|
rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0));
|
|
rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1));
|
|
if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize)
|
|
&& UINTVAL (elementsize) != 0
|
|
&& UINTVAL (totalsize) != 0)
|
|
{
|
|
rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2));
|
|
if (CONST_INT_P (lane_idx))
|
|
aarch64_simd_lane_bounds (lane_idx, 0,
|
|
UINTVAL (totalsize)
|
|
/ UINTVAL (elementsize),
|
|
exp);
|
|
else
|
|
error_at (EXPR_LOCATION (exp),
|
|
"lane index must be a constant immediate");
|
|
}
|
|
else
|
|
error_at (EXPR_LOCATION (exp),
|
|
"total size and element size must be a nonzero "
|
|
"constant immediate");
|
|
/* Don't generate any RTL. */
|
|
return const0_rtx;
|
|
}
|
|
aarch64_simd_builtin_datum *d =
|
|
&aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
|
|
enum insn_code icode = d->code;
|
|
builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1];
|
|
int num_args = insn_data[d->code].n_operands;
|
|
int is_void = 0;
|
|
int k;
|
|
|
|
is_void = !!(d->qualifiers[0] & qualifier_void);
|
|
|
|
num_args += is_void;
|
|
|
|
for (k = 1; k < num_args; k++)
|
|
{
|
|
/* We have four arrays of data, each indexed in a different fashion.
|
|
qualifiers - element 0 always describes the function return type.
|
|
operands - element 0 is either the operand for return value (if
|
|
the function has a non-void return type) or the operand for the
|
|
first argument.
|
|
expr_args - element 0 always holds the first argument.
|
|
args - element 0 is always used for the return type. */
|
|
int qualifiers_k = k;
|
|
int operands_k = k - is_void;
|
|
int expr_args_k = k - 1;
|
|
|
|
if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
|
|
args[k] = SIMD_ARG_LANE_INDEX;
|
|
else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
|
|
args[k] = SIMD_ARG_LANE_PAIR_INDEX;
|
|
else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
|
|
args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
|
|
else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
|
|
args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
|
|
else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
|
|
args[k] = SIMD_ARG_CONSTANT;
|
|
else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
|
|
{
|
|
rtx arg
|
|
= expand_normal (CALL_EXPR_ARG (exp,
|
|
(expr_args_k)));
|
|
/* Handle constants only if the predicate allows it. */
|
|
bool op_const_int_p =
|
|
(CONST_INT_P (arg)
|
|
&& (*insn_data[icode].operand[operands_k].predicate)
|
|
(arg, insn_data[icode].operand[operands_k].mode));
|
|
args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
|
|
}
|
|
else
|
|
args[k] = SIMD_ARG_COPY_TO_REG;
|
|
|
|
}
|
|
args[k] = SIMD_ARG_STOP;
|
|
|
|
/* The interface to aarch64_simd_expand_args expects a 0 if
|
|
the function is void, and a 1 if it is not. */
|
|
return aarch64_simd_expand_args
|
|
(target, icode, !is_void, exp, &args[1], d->mode);
|
|
}
|
|
|
|
rtx
|
|
aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
|
|
{
|
|
rtx pat;
|
|
aarch64_crc_builtin_datum *d
|
|
= &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
|
|
enum insn_code icode = d->icode;
|
|
tree arg0 = CALL_EXPR_ARG (exp, 0);
|
|
tree arg1 = CALL_EXPR_ARG (exp, 1);
|
|
rtx op0 = expand_normal (arg0);
|
|
rtx op1 = expand_normal (arg1);
|
|
machine_mode tmode = insn_data[icode].operand[0].mode;
|
|
machine_mode mode0 = insn_data[icode].operand[1].mode;
|
|
machine_mode mode1 = insn_data[icode].operand[2].mode;
|
|
|
|
if (! target
|
|
|| GET_MODE (target) != tmode
|
|
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
|
|
target = gen_reg_rtx (tmode);
|
|
|
|
gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
|
|
&& (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
|
|
|
|
if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
|
|
op0 = copy_to_mode_reg (mode0, op0);
|
|
if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
|
|
op1 = copy_to_mode_reg (mode1, op1);
|
|
|
|
pat = GEN_FCN (icode) (target, op0, op1);
|
|
if (!pat)
|
|
return NULL_RTX;
|
|
|
|
emit_insn (pat);
|
|
return target;
|
|
}
|
|
|
|
/* Function to expand reciprocal square root builtins. */
|
|
|
|
static rtx
|
|
aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
|
|
{
|
|
tree arg0 = CALL_EXPR_ARG (exp, 0);
|
|
rtx op0 = expand_normal (arg0);
|
|
|
|
rtx (*gen) (rtx, rtx);
|
|
|
|
switch (fcode)
|
|
{
|
|
case AARCH64_BUILTIN_RSQRT_DF:
|
|
gen = gen_rsqrtdf2;
|
|
break;
|
|
case AARCH64_BUILTIN_RSQRT_SF:
|
|
gen = gen_rsqrtsf2;
|
|
break;
|
|
case AARCH64_BUILTIN_RSQRT_V2DF:
|
|
gen = gen_rsqrtv2df2;
|
|
break;
|
|
case AARCH64_BUILTIN_RSQRT_V2SF:
|
|
gen = gen_rsqrtv2sf2;
|
|
break;
|
|
case AARCH64_BUILTIN_RSQRT_V4SF:
|
|
gen = gen_rsqrtv4sf2;
|
|
break;
|
|
default: gcc_unreachable ();
|
|
}
|
|
|
|
if (!target)
|
|
target = gen_reg_rtx (GET_MODE (op0));
|
|
|
|
emit_insn (gen (target, op0));
|
|
|
|
return target;
|
|
}
|
|
|
|
/* Expand a FCMLA lane expression EXP with code FCODE and
|
|
result going to TARGET if that is convenient. */
|
|
|
|
rtx
|
|
aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
|
|
{
|
|
int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
|
|
aarch64_fcmla_laneq_builtin_datum* d
|
|
= &aarch64_fcmla_lane_builtin_data[bcode];
|
|
machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
|
|
rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
|
|
rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
|
|
rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
|
|
tree tmp = CALL_EXPR_ARG (exp, 3);
|
|
rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
|
|
|
|
/* Validate that the lane index is a constant. */
|
|
if (!CONST_INT_P (lane_idx))
|
|
{
|
|
error_at (EXPR_LOCATION (exp),
|
|
"argument %d must be a constant immediate", 4);
|
|
return const0_rtx;
|
|
}
|
|
|
|
/* Validate that the index is within the expected range. */
|
|
int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
|
|
aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
|
|
|
|
/* Generate the correct register and mode. */
|
|
int lane = INTVAL (lane_idx);
|
|
|
|
if (lane < nunits / 4)
|
|
op2 = simplify_gen_subreg (d->mode, op2, quadmode,
|
|
subreg_lowpart_offset (d->mode, quadmode));
|
|
else
|
|
{
|
|
/* Select the upper 64 bits, either a V2SF or V4HF, this however
|
|
is quite messy, as the operation required even though simple
|
|
doesn't have a simple RTL pattern, and seems it's quite hard to
|
|
define using a single RTL pattern. The target generic version
|
|
gen_highpart_mode generates code that isn't optimal. */
|
|
rtx temp1 = gen_reg_rtx (d->mode);
|
|
rtx temp2 = gen_reg_rtx (DImode);
|
|
temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
|
|
subreg_lowpart_offset (d->mode, quadmode));
|
|
temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
|
|
if (BYTES_BIG_ENDIAN)
|
|
emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
|
|
else
|
|
emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
|
|
op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
|
|
|
|
/* And recalculate the index. */
|
|
lane -= nunits / 4;
|
|
}
|
|
|
|
/* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
|
|
(max nunits in range check) are valid. Which means only 0-1, so we
|
|
only need to know the order in a V2mode. */
|
|
lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
|
|
|
|
if (!target
|
|
|| !REG_P (target)
|
|
|| GET_MODE (target) != d->mode)
|
|
target = gen_reg_rtx (d->mode);
|
|
|
|
rtx pat = NULL_RTX;
|
|
|
|
if (d->lane)
|
|
pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
|
|
else
|
|
pat = GEN_FCN (d->icode) (target, op0, op1, op2);
|
|
|
|
if (!pat)
|
|
return NULL_RTX;
|
|
|
|
emit_insn (pat);
|
|
return target;
|
|
}
|
|
|
|
/* Function to expand an expression EXP which calls one of the Transactional
|
|
Memory Extension (TME) builtins FCODE with the result going to TARGET. */
|
|
static rtx
|
|
aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
|
|
{
|
|
switch (fcode)
|
|
{
|
|
case AARCH64_TME_BUILTIN_TSTART:
|
|
target = gen_reg_rtx (DImode);
|
|
emit_insn (GEN_FCN (CODE_FOR_tstart) (target));
|
|
break;
|
|
|
|
case AARCH64_TME_BUILTIN_TTEST:
|
|
target = gen_reg_rtx (DImode);
|
|
emit_insn (GEN_FCN (CODE_FOR_ttest) (target));
|
|
break;
|
|
|
|
case AARCH64_TME_BUILTIN_TCOMMIT:
|
|
emit_insn (GEN_FCN (CODE_FOR_tcommit) ());
|
|
break;
|
|
|
|
case AARCH64_TME_BUILTIN_TCANCEL:
|
|
{
|
|
tree arg0 = CALL_EXPR_ARG (exp, 0);
|
|
rtx op0 = expand_normal (arg0);
|
|
if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536)
|
|
emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0));
|
|
else
|
|
{
|
|
error_at (EXPR_LOCATION (exp),
|
|
"argument must be a 16-bit constant immediate");
|
|
return const0_rtx;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default :
|
|
gcc_unreachable ();
|
|
}
|
|
return target;
|
|
}
|
|
|
|
/* Function to expand an expression EXP which calls one of the Load/Store
|
|
64 Byte extension (LS64) builtins FCODE with the result going to TARGET. */
|
|
static rtx
|
|
aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target)
|
|
{
|
|
expand_operand ops[3];
|
|
|
|
switch (fcode)
|
|
{
|
|
case AARCH64_LS64_BUILTIN_LD64B:
|
|
{
|
|
rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
|
|
create_output_operand (&ops[0], target, V8DImode);
|
|
create_input_operand (&ops[1], op0, DImode);
|
|
expand_insn (CODE_FOR_ld64b, 2, ops);
|
|
return ops[0].value;
|
|
}
|
|
case AARCH64_LS64_BUILTIN_ST64B:
|
|
{
|
|
rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
|
|
rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
|
|
create_output_operand (&ops[0], op0, DImode);
|
|
create_input_operand (&ops[1], op1, V8DImode);
|
|
expand_insn (CODE_FOR_st64b, 2, ops);
|
|
return const0_rtx;
|
|
}
|
|
case AARCH64_LS64_BUILTIN_ST64BV:
|
|
{
|
|
rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
|
|
rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
|
|
create_output_operand (&ops[0], target, DImode);
|
|
create_input_operand (&ops[1], op0, DImode);
|
|
create_input_operand (&ops[2], op1, V8DImode);
|
|
expand_insn (CODE_FOR_st64bv, 3, ops);
|
|
return ops[0].value;
|
|
}
|
|
case AARCH64_LS64_BUILTIN_ST64BV0:
|
|
{
|
|
rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
|
|
rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
|
|
create_output_operand (&ops[0], target, DImode);
|
|
create_input_operand (&ops[1], op0, DImode);
|
|
create_input_operand (&ops[2], op1, V8DImode);
|
|
expand_insn (CODE_FOR_st64bv0, 3, ops);
|
|
return ops[0].value;
|
|
}
|
|
}
|
|
|
|
gcc_unreachable ();
|
|
}
|
|
|
|
/* Expand a random number builtin EXP with code FCODE, putting the result
|
|
int TARGET. If IGNORE is true the return value is ignored. */
|
|
|
|
rtx
|
|
aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
|
|
{
|
|
rtx pat;
|
|
enum insn_code icode;
|
|
if (fcode == AARCH64_BUILTIN_RNG_RNDR)
|
|
icode = CODE_FOR_aarch64_rndr;
|
|
else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
|
|
icode = CODE_FOR_aarch64_rndrrs;
|
|
else
|
|
gcc_unreachable ();
|
|
|
|
rtx rand = gen_reg_rtx (DImode);
|
|
pat = GEN_FCN (icode) (rand);
|
|
if (!pat)
|
|
return NULL_RTX;
|
|
|
|
tree arg0 = CALL_EXPR_ARG (exp, 0);
|
|
rtx res_addr = expand_normal (arg0);
|
|
res_addr = convert_memory_address (Pmode, res_addr);
|
|
rtx res_mem = gen_rtx_MEM (DImode, res_addr);
|
|
emit_insn (pat);
|
|
emit_move_insn (res_mem, rand);
|
|
/* If the status result is unused don't generate the CSET code. */
|
|
if (ignore)
|
|
return target;
|
|
|
|
rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
|
|
rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
|
|
emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
|
|
return target;
|
|
}
|
|
|
|
/* Expand an expression EXP that calls a MEMTAG built-in FCODE
|
|
with result going to TARGET. */
|
|
static rtx
|
|
aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
|
|
{
|
|
if (TARGET_ILP32)
|
|
{
|
|
error ("Memory Tagging Extension does not support %<-mabi=ilp32%>");
|
|
return const0_rtx;
|
|
}
|
|
|
|
rtx pat = NULL;
|
|
enum insn_code icode = aarch64_memtag_builtin_data[fcode -
|
|
AARCH64_MEMTAG_BUILTIN_START - 1].icode;
|
|
|
|
rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
|
|
machine_mode mode0 = GET_MODE (op0);
|
|
op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0);
|
|
op0 = convert_to_mode (DImode, op0, true);
|
|
|
|
switch (fcode)
|
|
{
|
|
case AARCH64_MEMTAG_BUILTIN_IRG:
|
|
case AARCH64_MEMTAG_BUILTIN_GMI:
|
|
case AARCH64_MEMTAG_BUILTIN_SUBP:
|
|
case AARCH64_MEMTAG_BUILTIN_INC_TAG:
|
|
{
|
|
if (! target
|
|
|| GET_MODE (target) != DImode
|
|
|| ! (*insn_data[icode].operand[0].predicate) (target, DImode))
|
|
target = gen_reg_rtx (DImode);
|
|
|
|
if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG)
|
|
{
|
|
rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
|
|
|
|
if ((*insn_data[icode].operand[3].predicate) (op1, QImode))
|
|
{
|
|
pat = GEN_FCN (icode) (target, op0, const0_rtx, op1);
|
|
break;
|
|
}
|
|
error_at (EXPR_LOCATION (exp),
|
|
"argument %d must be a constant immediate "
|
|
"in range [0,15]", 2);
|
|
return const0_rtx;
|
|
}
|
|
else
|
|
{
|
|
rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
|
|
machine_mode mode1 = GET_MODE (op1);
|
|
op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1);
|
|
op1 = convert_to_mode (DImode, op1, true);
|
|
pat = GEN_FCN (icode) (target, op0, op1);
|
|
}
|
|
break;
|
|
}
|
|
case AARCH64_MEMTAG_BUILTIN_GET_TAG:
|
|
target = op0;
|
|
pat = GEN_FCN (icode) (target, op0, const0_rtx);
|
|
break;
|
|
case AARCH64_MEMTAG_BUILTIN_SET_TAG:
|
|
pat = GEN_FCN (icode) (op0, op0, const0_rtx);
|
|
break;
|
|
default:
|
|
gcc_unreachable();
|
|
}
|
|
|
|
if (!pat)
|
|
return NULL_RTX;
|
|
|
|
emit_insn (pat);
|
|
return target;
|
|
}
|
|
|
|
/* Expand an expression EXP as fpsr or fpcr setter (depending on
|
|
UNSPEC) using MODE. */
|
|
static void
|
|
aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
|
|
{
|
|
tree arg = CALL_EXPR_ARG (exp, 0);
|
|
rtx op = force_reg (mode, expand_normal (arg));
|
|
emit_insn (gen_aarch64_set (unspec, mode, op));
|
|
}
|
|
|
|
/* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE.
|
|
Return the target. */
|
|
static rtx
|
|
aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
|
|
rtx target)
|
|
{
|
|
expand_operand op;
|
|
create_output_operand (&op, target, mode);
|
|
expand_insn (icode, 1, &op);
|
|
return op.value;
|
|
}
|
|
|
|
/* Expand an expression EXP that calls built-in function FCODE,
|
|
with result going to TARGET if that's convenient. IGNORE is true
|
|
if the result of the builtin is ignored. */
|
|
rtx
|
|
aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
|
|
int ignore)
|
|
{
|
|
int icode;
|
|
rtx op0;
|
|
tree arg0;
|
|
|
|
switch (fcode)
|
|
{
|
|
case AARCH64_BUILTIN_GET_FPCR:
|
|
return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi,
|
|
SImode, target);
|
|
case AARCH64_BUILTIN_SET_FPCR:
|
|
aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, SImode, exp);
|
|
return target;
|
|
case AARCH64_BUILTIN_GET_FPSR:
|
|
return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi,
|
|
SImode, target);
|
|
case AARCH64_BUILTIN_SET_FPSR:
|
|
aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, SImode, exp);
|
|
return target;
|
|
case AARCH64_BUILTIN_GET_FPCR64:
|
|
return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi,
|
|
DImode, target);
|
|
case AARCH64_BUILTIN_SET_FPCR64:
|
|
aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, DImode, exp);
|
|
return target;
|
|
case AARCH64_BUILTIN_GET_FPSR64:
|
|
return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi,
|
|
DImode, target);
|
|
case AARCH64_BUILTIN_SET_FPSR64:
|
|
aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, DImode, exp);
|
|
return target;
|
|
case AARCH64_PAUTH_BUILTIN_AUTIA1716:
|
|
case AARCH64_PAUTH_BUILTIN_PACIA1716:
|
|
case AARCH64_PAUTH_BUILTIN_AUTIB1716:
|
|
case AARCH64_PAUTH_BUILTIN_PACIB1716:
|
|
case AARCH64_PAUTH_BUILTIN_XPACLRI:
|
|
arg0 = CALL_EXPR_ARG (exp, 0);
|
|
op0 = force_reg (Pmode, expand_normal (arg0));
|
|
|
|
if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
|
|
{
|
|
rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
|
|
icode = CODE_FOR_xpaclri;
|
|
emit_move_insn (lr, op0);
|
|
emit_insn (GEN_FCN (icode) ());
|
|
return lr;
|
|
}
|
|
else
|
|
{
|
|
tree arg1 = CALL_EXPR_ARG (exp, 1);
|
|
rtx op1 = force_reg (Pmode, expand_normal (arg1));
|
|
switch (fcode)
|
|
{
|
|
case AARCH64_PAUTH_BUILTIN_AUTIA1716:
|
|
icode = CODE_FOR_autia1716;
|
|
break;
|
|
case AARCH64_PAUTH_BUILTIN_AUTIB1716:
|
|
icode = CODE_FOR_autib1716;
|
|
break;
|
|
case AARCH64_PAUTH_BUILTIN_PACIA1716:
|
|
icode = CODE_FOR_pacia1716;
|
|
break;
|
|
case AARCH64_PAUTH_BUILTIN_PACIB1716:
|
|
icode = CODE_FOR_pacib1716;
|
|
break;
|
|
default:
|
|
icode = 0;
|
|
gcc_unreachable ();
|
|
}
|
|
|
|
rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
|
|
rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
|
|
emit_move_insn (x17_reg, op0);
|
|
emit_move_insn (x16_reg, op1);
|
|
emit_insn (GEN_FCN (icode) ());
|
|
return x17_reg;
|
|
}
|
|
|
|
case AARCH64_JSCVT:
|
|
{
|
|
expand_operand ops[2];
|
|
create_output_operand (&ops[0], target, SImode);
|
|
op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
|
|
create_input_operand (&ops[1], op0, DFmode);
|
|
expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
|
|
return ops[0].value;
|
|
}
|
|
|
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
|
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
|
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
|
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
|
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
|
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
|
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
|
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
|
|
return aarch64_expand_fcmla_builtin (exp, target, fcode);
|
|
case AARCH64_BUILTIN_RNG_RNDR:
|
|
case AARCH64_BUILTIN_RNG_RNDRRS:
|
|
return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
|
|
}
|
|
|
|
if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
|
|
return aarch64_simd_expand_builtin (fcode, exp, target);
|
|
else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
|
|
return aarch64_crc32_expand_builtin (fcode, exp, target);
|
|
|
|
if (fcode == AARCH64_BUILTIN_RSQRT_DF
|
|
|| fcode == AARCH64_BUILTIN_RSQRT_SF
|
|
|| fcode == AARCH64_BUILTIN_RSQRT_V2DF
|
|
|| fcode == AARCH64_BUILTIN_RSQRT_V2SF
|
|
|| fcode == AARCH64_BUILTIN_RSQRT_V4SF)
|
|
return aarch64_expand_builtin_rsqrt (fcode, exp, target);
|
|
|
|
if (fcode == AARCH64_TME_BUILTIN_TSTART
|
|
|| fcode == AARCH64_TME_BUILTIN_TCOMMIT
|
|
|| fcode == AARCH64_TME_BUILTIN_TTEST
|
|
|| fcode == AARCH64_TME_BUILTIN_TCANCEL)
|
|
return aarch64_expand_builtin_tme (fcode, exp, target);
|
|
|
|
if (fcode == AARCH64_LS64_BUILTIN_LD64B
|
|
|| fcode == AARCH64_LS64_BUILTIN_ST64B
|
|
|| fcode == AARCH64_LS64_BUILTIN_ST64BV
|
|
|| fcode == AARCH64_LS64_BUILTIN_ST64BV0)
|
|
return aarch64_expand_builtin_ls64 (fcode, exp, target);
|
|
|
|
if (fcode >= AARCH64_MEMTAG_BUILTIN_START
|
|
&& fcode <= AARCH64_MEMTAG_BUILTIN_END)
|
|
return aarch64_expand_builtin_memtag (fcode, exp, target);
|
|
|
|
gcc_unreachable ();
|
|
}
|
|
|
|
tree
|
|
aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
|
|
tree type_in)
|
|
{
|
|
machine_mode in_mode, out_mode;
|
|
|
|
if (TREE_CODE (type_out) != VECTOR_TYPE
|
|
|| TREE_CODE (type_in) != VECTOR_TYPE)
|
|
return NULL_TREE;
|
|
|
|
out_mode = TYPE_MODE (type_out);
|
|
in_mode = TYPE_MODE (type_in);
|
|
|
|
#undef AARCH64_CHECK_BUILTIN_MODE
|
|
#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
|
|
#define AARCH64_FIND_FRINT_VARIANT(N) \
|
|
(AARCH64_CHECK_BUILTIN_MODE (2, D) \
|
|
? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \
|
|
: (AARCH64_CHECK_BUILTIN_MODE (4, S) \
|
|
? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \
|
|
: (AARCH64_CHECK_BUILTIN_MODE (2, S) \
|
|
? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \
|
|
: NULL_TREE)))
|
|
switch (fn)
|
|
{
|
|
#undef AARCH64_CHECK_BUILTIN_MODE
|
|
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
|
|
(out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode)
|
|
CASE_CFN_FLOOR:
|
|
return AARCH64_FIND_FRINT_VARIANT (floor);
|
|
CASE_CFN_CEIL:
|
|
return AARCH64_FIND_FRINT_VARIANT (ceil);
|
|
CASE_CFN_TRUNC:
|
|
return AARCH64_FIND_FRINT_VARIANT (btrunc);
|
|
CASE_CFN_ROUND:
|
|
return AARCH64_FIND_FRINT_VARIANT (round);
|
|
CASE_CFN_NEARBYINT:
|
|
return AARCH64_FIND_FRINT_VARIANT (nearbyint);
|
|
CASE_CFN_SQRT:
|
|
return AARCH64_FIND_FRINT_VARIANT (sqrt);
|
|
#undef AARCH64_CHECK_BUILTIN_MODE
|
|
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
|
|
(out_mode == V##C##SImode && in_mode == V##C##N##Imode)
|
|
CASE_CFN_CLZ:
|
|
{
|
|
if (AARCH64_CHECK_BUILTIN_MODE (4, S))
|
|
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
|
|
return NULL_TREE;
|
|
}
|
|
CASE_CFN_CTZ:
|
|
{
|
|
if (AARCH64_CHECK_BUILTIN_MODE (2, S))
|
|
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si];
|
|
else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
|
|
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
|
|
return NULL_TREE;
|
|
}
|
|
#undef AARCH64_CHECK_BUILTIN_MODE
|
|
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
|
|
(out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode)
|
|
CASE_CFN_IFLOOR:
|
|
CASE_CFN_LFLOOR:
|
|
CASE_CFN_LLFLOOR:
|
|
{
|
|
enum aarch64_builtins builtin;
|
|
if (AARCH64_CHECK_BUILTIN_MODE (2, D))
|
|
builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di;
|
|
else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
|
|
builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si;
|
|
else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
|
|
builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si;
|
|
else
|
|
return NULL_TREE;
|
|
|
|
return aarch64_builtin_decls[builtin];
|
|
}
|
|
CASE_CFN_ICEIL:
|
|
CASE_CFN_LCEIL:
|
|
CASE_CFN_LLCEIL:
|
|
{
|
|
enum aarch64_builtins builtin;
|
|
if (AARCH64_CHECK_BUILTIN_MODE (2, D))
|
|
builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di;
|
|
else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
|
|
builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si;
|
|
else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
|
|
builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si;
|
|
else
|
|
return NULL_TREE;
|
|
|
|
return aarch64_builtin_decls[builtin];
|
|
}
|
|
CASE_CFN_IROUND:
|
|
CASE_CFN_LROUND:
|
|
CASE_CFN_LLROUND:
|
|
{
|
|
enum aarch64_builtins builtin;
|
|
if (AARCH64_CHECK_BUILTIN_MODE (2, D))
|
|
builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di;
|
|
else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
|
|
builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si;
|
|
else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
|
|
builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si;
|
|
else
|
|
return NULL_TREE;
|
|
|
|
return aarch64_builtin_decls[builtin];
|
|
}
|
|
default:
|
|
return NULL_TREE;
|
|
}
|
|
|
|
return NULL_TREE;
|
|
}
|
|
|
|
/* Return builtin for reciprocal square root. */
|
|
|
|
tree
|
|
aarch64_general_builtin_rsqrt (unsigned int fn)
|
|
{
|
|
if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
|
|
return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
|
|
if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
|
|
return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
|
|
if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
|
|
return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
|
|
return NULL_TREE;
|
|
}
|
|
|
|
/* Return true if the lane check can be removed as there is no
|
|
error going to be emitted. */
|
|
static bool
|
|
aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
|
|
{
|
|
if (TREE_CODE (arg0) != INTEGER_CST)
|
|
return false;
|
|
if (TREE_CODE (arg1) != INTEGER_CST)
|
|
return false;
|
|
if (TREE_CODE (arg2) != INTEGER_CST)
|
|
return false;
|
|
|
|
auto totalsize = wi::to_widest (arg0);
|
|
auto elementsize = wi::to_widest (arg1);
|
|
if (totalsize == 0 || elementsize == 0)
|
|
return false;
|
|
auto lane = wi::to_widest (arg2);
|
|
auto high = wi::udiv_trunc (totalsize, elementsize);
|
|
return wi::ltu_p (lane, high);
|
|
}
|
|
|
|
#undef VAR1
|
|
#define VAR1(T, N, MAP, FLAG, A) \
|
|
case AARCH64_SIMD_BUILTIN_##T##_##N##A:
|
|
|
|
/* Try to fold a call to the built-in function with subcode FCODE. The
|
|
function is passed the N_ARGS arguments in ARGS and it returns a value
|
|
of type TYPE. Return the new expression on success and NULL_TREE on
|
|
failure. */
|
|
tree
|
|
aarch64_general_fold_builtin (unsigned int fcode, tree type,
|
|
unsigned int n_args ATTRIBUTE_UNUSED, tree *args)
|
|
{
|
|
switch (fcode)
|
|
{
|
|
BUILTIN_VDQF (UNOP, abs, 2, ALL)
|
|
return fold_build1 (ABS_EXPR, type, args[0]);
|
|
VAR1 (UNOP, floatv2si, 2, ALL, v2sf)
|
|
VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
|
|
VAR1 (UNOP, floatv2di, 2, ALL, v2df)
|
|
return fold_build1 (FLOAT_EXPR, type, args[0]);
|
|
case AARCH64_SIMD_BUILTIN_LANE_CHECK:
|
|
gcc_assert (n_args == 3);
|
|
if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
|
|
return void_node;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return NULL_TREE;
|
|
}
|
|
|
|
enum aarch64_simd_type
|
|
get_mem_type_for_load_store (unsigned int fcode)
|
|
{
|
|
switch (fcode)
|
|
{
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v8qi)
|
|
VAR1 (STORE1, st1, 0, STORE, v8qi)
|
|
return Int8x8_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v16qi)
|
|
VAR1 (STORE1, st1, 0, STORE, v16qi)
|
|
return Int8x16_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v4hi)
|
|
VAR1 (STORE1, st1, 0, STORE, v4hi)
|
|
return Int16x4_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v8hi)
|
|
VAR1 (STORE1, st1, 0, STORE, v8hi)
|
|
return Int16x8_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v2si)
|
|
VAR1 (STORE1, st1, 0, STORE, v2si)
|
|
return Int32x2_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v4si)
|
|
VAR1 (STORE1, st1, 0, STORE, v4si)
|
|
return Int32x4_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v2di)
|
|
VAR1 (STORE1, st1, 0, STORE, v2di)
|
|
return Int64x2_t;
|
|
VAR1 (LOAD1_U, ld1, 0, LOAD, v8qi)
|
|
VAR1 (STORE1_U, st1, 0, STORE, v8qi)
|
|
return Uint8x8_t;
|
|
VAR1 (LOAD1_U, ld1, 0, LOAD, v16qi)
|
|
VAR1 (STORE1_U, st1, 0, STORE, v16qi)
|
|
return Uint8x16_t;
|
|
VAR1 (LOAD1_U, ld1, 0, LOAD, v4hi)
|
|
VAR1 (STORE1_U, st1, 0, STORE, v4hi)
|
|
return Uint16x4_t;
|
|
VAR1 (LOAD1_U, ld1, 0, LOAD, v8hi)
|
|
VAR1 (STORE1_U, st1, 0, STORE, v8hi)
|
|
return Uint16x8_t;
|
|
VAR1 (LOAD1_U, ld1, 0, LOAD, v2si)
|
|
VAR1 (STORE1_U, st1, 0, STORE, v2si)
|
|
return Uint32x2_t;
|
|
VAR1 (LOAD1_U, ld1, 0, LOAD, v4si)
|
|
VAR1 (STORE1_U, st1, 0, STORE, v4si)
|
|
return Uint32x4_t;
|
|
VAR1 (LOAD1_U, ld1, 0, LOAD, v2di)
|
|
VAR1 (STORE1_U, st1, 0, STORE, v2di)
|
|
return Uint64x2_t;
|
|
VAR1 (LOAD1_P, ld1, 0, LOAD, v8qi)
|
|
VAR1 (STORE1_P, st1, 0, STORE, v8qi)
|
|
return Poly8x8_t;
|
|
VAR1 (LOAD1_P, ld1, 0, LOAD, v16qi)
|
|
VAR1 (STORE1_P, st1, 0, STORE, v16qi)
|
|
return Poly8x16_t;
|
|
VAR1 (LOAD1_P, ld1, 0, LOAD, v4hi)
|
|
VAR1 (STORE1_P, st1, 0, STORE, v4hi)
|
|
return Poly16x4_t;
|
|
VAR1 (LOAD1_P, ld1, 0, LOAD, v8hi)
|
|
VAR1 (STORE1_P, st1, 0, STORE, v8hi)
|
|
return Poly16x8_t;
|
|
VAR1 (LOAD1_P, ld1, 0, LOAD, v2di)
|
|
VAR1 (STORE1_P, st1, 0, STORE, v2di)
|
|
return Poly64x2_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v4hf)
|
|
VAR1 (STORE1, st1, 0, STORE, v4hf)
|
|
return Float16x4_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v8hf)
|
|
VAR1 (STORE1, st1, 0, STORE, v8hf)
|
|
return Float16x8_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v4bf)
|
|
VAR1 (STORE1, st1, 0, STORE, v4bf)
|
|
return Bfloat16x4_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v8bf)
|
|
VAR1 (STORE1, st1, 0, STORE, v8bf)
|
|
return Bfloat16x8_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v2sf)
|
|
VAR1 (STORE1, st1, 0, STORE, v2sf)
|
|
return Float32x2_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v4sf)
|
|
VAR1 (STORE1, st1, 0, STORE, v4sf)
|
|
return Float32x4_t;
|
|
VAR1 (LOAD1, ld1, 0, LOAD, v2df)
|
|
VAR1 (STORE1, st1, 0, STORE, v2df)
|
|
return Float64x2_t;
|
|
default:
|
|
gcc_unreachable ();
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Try to fold STMT, given that it's a call to the built-in function with
|
|
subcode FCODE. Return the new statement on success and null on
|
|
failure. */
|
|
gimple *
|
|
aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
|
|
gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
|
|
{
|
|
gimple *new_stmt = NULL;
|
|
unsigned nargs = gimple_call_num_args (stmt);
|
|
tree *args = (nargs > 0
|
|
? gimple_call_arg_ptr (stmt, 0)
|
|
: &error_mark_node);
|
|
|
|
/* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
|
|
and unsigned int; it will distinguish according to the types of
|
|
the arguments to the __builtin. */
|
|
switch (fcode)
|
|
{
|
|
BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, ALL)
|
|
new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
|
|
1, args[0]);
|
|
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
|
break;
|
|
|
|
/* Lower sqrt builtins to gimple/internal function sqrt. */
|
|
BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
|
|
new_stmt = gimple_build_call_internal (IFN_SQRT,
|
|
1, args[0]);
|
|
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
|
break;
|
|
|
|
/*lower store and load neon builtins to gimple. */
|
|
BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
|
|
BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
|
|
BUILTIN_VALLP_NO_DI (LOAD1_P, ld1, 0, LOAD)
|
|
if (!BYTES_BIG_ENDIAN)
|
|
{
|
|
enum aarch64_simd_type mem_type
|
|
= get_mem_type_for_load_store(fcode);
|
|
aarch64_simd_type_info simd_type
|
|
= aarch64_simd_types[mem_type];
|
|
tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
|
|
VOIDmode, true);
|
|
tree zero = build_zero_cst (elt_ptr_type);
|
|
/* Use element type alignment. */
|
|
tree access_type
|
|
= build_aligned_type (simd_type.itype,
|
|
TYPE_ALIGN (simd_type.eltype));
|
|
new_stmt
|
|
= gimple_build_assign (gimple_get_lhs (stmt),
|
|
fold_build2 (MEM_REF,
|
|
access_type,
|
|
args[0], zero));
|
|
}
|
|
break;
|
|
|
|
BUILTIN_VALL_F16 (STORE1, st1, 0, STORE)
|
|
BUILTIN_VDQ_I (STORE1_U, st1, 0, STORE)
|
|
BUILTIN_VALLP_NO_DI (STORE1_P, st1, 0, STORE)
|
|
if (!BYTES_BIG_ENDIAN)
|
|
{
|
|
enum aarch64_simd_type mem_type
|
|
= get_mem_type_for_load_store(fcode);
|
|
aarch64_simd_type_info simd_type
|
|
= aarch64_simd_types[mem_type];
|
|
tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
|
|
VOIDmode, true);
|
|
tree zero = build_zero_cst (elt_ptr_type);
|
|
/* Use element type alignment. */
|
|
tree access_type
|
|
= build_aligned_type (simd_type.itype,
|
|
TYPE_ALIGN (simd_type.eltype));
|
|
new_stmt
|
|
= gimple_build_assign (fold_build2 (MEM_REF, access_type,
|
|
args[0], zero),
|
|
args[1]);
|
|
}
|
|
break;
|
|
|
|
BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL)
|
|
BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL)
|
|
new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
|
|
1, args[0]);
|
|
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
|
break;
|
|
BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10, ALL)
|
|
BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10, ALL)
|
|
new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
|
|
1, args[0]);
|
|
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
|
break;
|
|
BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
|
|
if (TREE_CODE (args[1]) == INTEGER_CST
|
|
&& wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
|
|
new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
|
LSHIFT_EXPR, args[0], args[1]);
|
|
break;
|
|
BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
|
|
BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
|
|
{
|
|
tree cst = args[1];
|
|
tree ctype = TREE_TYPE (cst);
|
|
/* Left shifts can be both scalar or vector, e.g. uint64x1_t is
|
|
treated as a scalar type not a vector one. */
|
|
if ((cst = uniform_integer_cst_p (cst)) != NULL_TREE)
|
|
{
|
|
wide_int wcst = wi::to_wide (cst);
|
|
tree unit_ty = TREE_TYPE (cst);
|
|
|
|
wide_int abs_cst = wi::abs (wcst);
|
|
if (wi::geu_p (abs_cst, element_precision (args[0])))
|
|
break;
|
|
|
|
if (wi::neg_p (wcst, TYPE_SIGN (ctype)))
|
|
{
|
|
tree final_cst;
|
|
final_cst = wide_int_to_tree (unit_ty, abs_cst);
|
|
if (TREE_CODE (cst) != INTEGER_CST)
|
|
final_cst = build_uniform_cst (ctype, final_cst);
|
|
|
|
new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
|
RSHIFT_EXPR, args[0],
|
|
final_cst);
|
|
}
|
|
else
|
|
new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
|
LSHIFT_EXPR, args[0], args[1]);
|
|
}
|
|
}
|
|
break;
|
|
BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
|
|
VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
|
|
BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
|
|
VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
|
|
if (TREE_CODE (args[1]) == INTEGER_CST
|
|
&& wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
|
|
new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
|
RSHIFT_EXPR, args[0], args[1]);
|
|
break;
|
|
BUILTIN_GPF (BINOP, fmulx, 0, ALL)
|
|
{
|
|
gcc_assert (nargs == 2);
|
|
bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
|
|
bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
|
|
if (a0_cst_p || a1_cst_p)
|
|
{
|
|
if (a0_cst_p && a1_cst_p)
|
|
{
|
|
tree t0 = TREE_TYPE (args[0]);
|
|
real_value a0 = (TREE_REAL_CST (args[0]));
|
|
real_value a1 = (TREE_REAL_CST (args[1]));
|
|
if (real_equal (&a1, &dconst0))
|
|
std::swap (a0, a1);
|
|
/* According to real_equal (), +0 equals -0. */
|
|
if (real_equal (&a0, &dconst0) && real_isinf (&a1))
|
|
{
|
|
real_value res = dconst2;
|
|
res.sign = a0.sign ^ a1.sign;
|
|
new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
|
REAL_CST,
|
|
build_real (t0, res));
|
|
}
|
|
else
|
|
new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
|
MULT_EXPR,
|
|
args[0], args[1]);
|
|
}
|
|
else /* a0_cst_p ^ a1_cst_p. */
|
|
{
|
|
real_value const_part = a0_cst_p
|
|
? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
|
|
if (!real_equal (&const_part, &dconst0)
|
|
&& !real_isinf (&const_part))
|
|
new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
|
MULT_EXPR, args[0],
|
|
args[1]);
|
|
}
|
|
}
|
|
if (new_stmt)
|
|
{
|
|
gimple_set_vuse (new_stmt, gimple_vuse (stmt));
|
|
gimple_set_vdef (new_stmt, gimple_vdef (stmt));
|
|
}
|
|
break;
|
|
}
|
|
case AARCH64_SIMD_BUILTIN_LANE_CHECK:
|
|
if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
|
|
{
|
|
unlink_stmt_vdef (stmt);
|
|
release_defs (stmt);
|
|
new_stmt = gimple_build_nop ();
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return new_stmt;
|
|
}
|
|
|
|
void
|
|
aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
|
|
{
|
|
const unsigned AARCH64_FE_INVALID = 1;
|
|
const unsigned AARCH64_FE_DIVBYZERO = 2;
|
|
const unsigned AARCH64_FE_OVERFLOW = 4;
|
|
const unsigned AARCH64_FE_UNDERFLOW = 8;
|
|
const unsigned AARCH64_FE_INEXACT = 16;
|
|
const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
|
|
| AARCH64_FE_DIVBYZERO
|
|
| AARCH64_FE_OVERFLOW
|
|
| AARCH64_FE_UNDERFLOW
|
|
| AARCH64_FE_INEXACT);
|
|
const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
|
|
tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
|
|
tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
|
|
tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
|
|
tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
|
|
|
|
/* Generate the equivalence of :
|
|
unsigned int fenv_cr;
|
|
fenv_cr = __builtin_aarch64_get_fpcr ();
|
|
|
|
unsigned int fenv_sr;
|
|
fenv_sr = __builtin_aarch64_get_fpsr ();
|
|
|
|
Now set all exceptions to non-stop
|
|
unsigned int mask_cr
|
|
= ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
|
|
unsigned int masked_cr;
|
|
masked_cr = fenv_cr & mask_cr;
|
|
|
|
And clear all exception flags
|
|
unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
|
|
unsigned int masked_cr;
|
|
masked_sr = fenv_sr & mask_sr;
|
|
|
|
__builtin_aarch64_set_cr (masked_cr);
|
|
__builtin_aarch64_set_sr (masked_sr); */
|
|
|
|
fenv_cr = create_tmp_var_raw (unsigned_type_node);
|
|
fenv_sr = create_tmp_var_raw (unsigned_type_node);
|
|
|
|
get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
|
|
set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
|
|
get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
|
|
set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
|
|
|
|
mask_cr = build_int_cst (unsigned_type_node,
|
|
~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
|
|
mask_sr = build_int_cst (unsigned_type_node,
|
|
~(AARCH64_FE_ALL_EXCEPT));
|
|
|
|
ld_fenv_cr = build4 (TARGET_EXPR, unsigned_type_node,
|
|
fenv_cr, build_call_expr (get_fpcr, 0),
|
|
NULL_TREE, NULL_TREE);
|
|
ld_fenv_sr = build4 (TARGET_EXPR, unsigned_type_node,
|
|
fenv_sr, build_call_expr (get_fpsr, 0),
|
|
NULL_TREE, NULL_TREE);
|
|
|
|
masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
|
|
masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
|
|
|
|
hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
|
|
hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
|
|
|
|
hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
|
|
hold_fnclex_sr);
|
|
masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
|
|
masked_fenv_sr);
|
|
ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
|
|
|
|
*hold = build2 (COMPOUND_EXPR, void_type_node,
|
|
build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
|
|
hold_fnclex);
|
|
|
|
/* Store the value of masked_fenv to clear the exceptions:
|
|
__builtin_aarch64_set_fpsr (masked_fenv_sr); */
|
|
|
|
*clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
|
|
|
|
/* Generate the equivalent of :
|
|
unsigned int new_fenv_var;
|
|
new_fenv_var = __builtin_aarch64_get_fpsr ();
|
|
|
|
__builtin_aarch64_set_fpsr (fenv_sr);
|
|
|
|
__atomic_feraiseexcept (new_fenv_var); */
|
|
|
|
new_fenv_var = create_tmp_var_raw (unsigned_type_node);
|
|
reload_fenv = build4 (TARGET_EXPR, unsigned_type_node,
|
|
new_fenv_var, build_call_expr (get_fpsr, 0),
|
|
NULL_TREE, NULL_TREE);
|
|
restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
|
|
atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
|
|
update_call = build_call_expr (atomic_feraiseexcept, 1,
|
|
fold_convert (integer_type_node, new_fenv_var));
|
|
*update = build2 (COMPOUND_EXPR, void_type_node,
|
|
build2 (COMPOUND_EXPR, void_type_node,
|
|
reload_fenv, restore_fnenv), update_call);
|
|
}
|
|
|
|
/* Resolve overloaded MEMTAG build-in functions. */
|
|
#define AARCH64_BUILTIN_SUBCODE(F) \
|
|
(DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT)
|
|
|
|
static tree
|
|
aarch64_resolve_overloaded_memtag (location_t loc,
|
|
tree fndecl, void *pass_params)
|
|
{
|
|
vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params);
|
|
unsigned param_num = params ? params->length() : 0;
|
|
unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl);
|
|
tree inittype = aarch64_memtag_builtin_data[
|
|
fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype;
|
|
unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1;
|
|
|
|
if (param_num != arg_num)
|
|
{
|
|
TREE_TYPE (fndecl) = inittype;
|
|
return NULL_TREE;
|
|
}
|
|
tree retype = NULL;
|
|
|
|
if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP)
|
|
{
|
|
tree t0 = TREE_TYPE ((*params)[0]);
|
|
tree t1 = TREE_TYPE ((*params)[1]);
|
|
|
|
if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
|
|
t0 = ptr_type_node;
|
|
if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE)
|
|
t1 = ptr_type_node;
|
|
|
|
if (TYPE_MODE (t0) != DImode)
|
|
warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
|
|
(int)tree_to_shwi (DECL_SIZE ((*params)[0])));
|
|
|
|
if (TYPE_MODE (t1) != DImode)
|
|
warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit",
|
|
(int)tree_to_shwi (DECL_SIZE ((*params)[1])));
|
|
|
|
retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL);
|
|
}
|
|
else
|
|
{
|
|
tree t0 = TREE_TYPE ((*params)[0]);
|
|
|
|
if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
|
|
{
|
|
TREE_TYPE (fndecl) = inittype;
|
|
return NULL_TREE;
|
|
}
|
|
|
|
if (TYPE_MODE (t0) != DImode)
|
|
warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
|
|
(int)tree_to_shwi (DECL_SIZE ((*params)[0])));
|
|
|
|
switch (fcode)
|
|
{
|
|
case AARCH64_MEMTAG_BUILTIN_IRG:
|
|
retype = build_function_type_list (t0, t0, uint64_type_node, NULL);
|
|
break;
|
|
case AARCH64_MEMTAG_BUILTIN_GMI:
|
|
retype = build_function_type_list (uint64_type_node, t0,
|
|
uint64_type_node, NULL);
|
|
break;
|
|
case AARCH64_MEMTAG_BUILTIN_INC_TAG:
|
|
retype = build_function_type_list (t0, t0, unsigned_type_node, NULL);
|
|
break;
|
|
case AARCH64_MEMTAG_BUILTIN_SET_TAG:
|
|
retype = build_function_type_list (void_type_node, t0, NULL);
|
|
break;
|
|
case AARCH64_MEMTAG_BUILTIN_GET_TAG:
|
|
retype = build_function_type_list (t0, t0, NULL);
|
|
break;
|
|
default:
|
|
return NULL_TREE;
|
|
}
|
|
}
|
|
|
|
if (!retype || retype == error_mark_node)
|
|
TREE_TYPE (fndecl) = inittype;
|
|
else
|
|
TREE_TYPE (fndecl) = retype;
|
|
|
|
return NULL_TREE;
|
|
}
|
|
|
|
/* Called at aarch64_resolve_overloaded_builtin in aarch64-c.cc. */
|
|
tree
|
|
aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
|
|
void *pass_params)
|
|
{
|
|
unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function);
|
|
|
|
if (fcode >= AARCH64_MEMTAG_BUILTIN_START
|
|
&& fcode <= AARCH64_MEMTAG_BUILTIN_END)
|
|
return aarch64_resolve_overloaded_memtag(loc, function, pass_params);
|
|
|
|
return NULL_TREE;
|
|
}
|
|
|
|
#undef AARCH64_CHECK_BUILTIN_MODE
|
|
#undef AARCH64_FIND_FRINT_VARIANT
|
|
#undef CF0
|
|
#undef CF1
|
|
#undef CF2
|
|
#undef CF3
|
|
#undef CF4
|
|
#undef CF10
|
|
#undef VAR1
|
|
#undef VAR2
|
|
#undef VAR3
|
|
#undef VAR4
|
|
#undef VAR5
|
|
#undef VAR6
|
|
#undef VAR7
|
|
#undef VAR8
|
|
#undef VAR9
|
|
#undef VAR10
|
|
#undef VAR11
|
|
|
|
#include "gt-aarch64-builtins.h"
|