gcc/gcc/config/aarch64/aarch64-builtins.cc

/* Builtins' description for AArch64 SIMD architecture.
   Copyright (C) 2011-2022 Free Software Foundation, Inc.
   Contributed by ARM Ltd.

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   GCC is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GCC; see the file COPYING3.  If not see
   <http://www.gnu.org/licenses/>.  */

#define IN_TARGET_CODE 1

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "function.h"
#include "basic-block.h"
#include "rtl.h"
#include "tree.h"
#include "gimple.h"
#include "ssa.h"
#include "memmodel.h"
#include "tm_p.h"
#include "expmed.h"
#include "optabs.h"
#include "recog.h"
#include "diagnostic-core.h"
#include "fold-const.h"
#include "stor-layout.h"
#include "explow.h"
#include "expr.h"
#include "langhooks.h"
#include "gimple-iterator.h"
#include "case-cfn-macros.h"
#include "emit-rtl.h"
#include "stringpool.h"
#include "attribs.h"
#include "gimple-fold.h"

#define v8qi_UP  E_V8QImode
#define v8di_UP  E_V8DImode
#define v4hi_UP  E_V4HImode
#define v4hf_UP  E_V4HFmode
#define v2si_UP  E_V2SImode
#define v2sf_UP  E_V2SFmode
#define v1df_UP  E_V1DFmode
#define di_UP    E_DImode
#define df_UP    E_DFmode
#define v16qi_UP E_V16QImode
#define v8hi_UP  E_V8HImode
#define v8hf_UP  E_V8HFmode
#define v4si_UP  E_V4SImode
#define v4sf_UP  E_V4SFmode
#define v2di_UP  E_V2DImode
#define v2df_UP  E_V2DFmode
#define ti_UP	 E_TImode
#define oi_UP	 E_OImode
#define ci_UP	 E_CImode
#define xi_UP	 E_XImode
#define si_UP    E_SImode
#define sf_UP    E_SFmode
#define hi_UP    E_HImode
#define hf_UP    E_HFmode
#define qi_UP    E_QImode
#define bf_UP    E_BFmode
#define v4bf_UP  E_V4BFmode
#define v8bf_UP  E_V8BFmode
#define v2x8qi_UP  E_V2x8QImode
#define v2x4hi_UP  E_V2x4HImode
#define v2x4hf_UP  E_V2x4HFmode
#define v2x4bf_UP  E_V2x4BFmode
#define v2x2si_UP  E_V2x2SImode
#define v2x2sf_UP  E_V2x2SFmode
#define v2x1di_UP  E_V2x1DImode
#define v2x1df_UP  E_V2x1DFmode
#define v2x16qi_UP E_V2x16QImode
#define v2x8hi_UP  E_V2x8HImode
#define v2x8hf_UP  E_V2x8HFmode
#define v2x8bf_UP  E_V2x8BFmode
#define v2x4si_UP  E_V2x4SImode
#define v2x4sf_UP  E_V2x4SFmode
#define v2x2di_UP  E_V2x2DImode
#define v2x2df_UP  E_V2x2DFmode
#define v3x8qi_UP  E_V3x8QImode
#define v3x4hi_UP  E_V3x4HImode
#define v3x4hf_UP  E_V3x4HFmode
#define v3x4bf_UP  E_V3x4BFmode
#define v3x2si_UP  E_V3x2SImode
#define v3x2sf_UP  E_V3x2SFmode
#define v3x1di_UP  E_V3x1DImode
#define v3x1df_UP  E_V3x1DFmode
#define v3x16qi_UP E_V3x16QImode
#define v3x8hi_UP  E_V3x8HImode
#define v3x8hf_UP  E_V3x8HFmode
#define v3x8bf_UP  E_V3x8BFmode
#define v3x4si_UP  E_V3x4SImode
#define v3x4sf_UP  E_V3x4SFmode
#define v3x2di_UP  E_V3x2DImode
#define v3x2df_UP  E_V3x2DFmode
#define v4x8qi_UP  E_V4x8QImode
#define v4x4hi_UP  E_V4x4HImode
#define v4x4hf_UP  E_V4x4HFmode
#define v4x4bf_UP  E_V4x4BFmode
#define v4x2si_UP  E_V4x2SImode
#define v4x2sf_UP  E_V4x2SFmode
#define v4x1di_UP  E_V4x1DImode
#define v4x1df_UP  E_V4x1DFmode
#define v4x16qi_UP E_V4x16QImode
#define v4x8hi_UP  E_V4x8HImode
#define v4x8hf_UP  E_V4x8HFmode
#define v4x8bf_UP  E_V4x8BFmode
#define v4x4si_UP  E_V4x4SImode
#define v4x4sf_UP  E_V4x4SFmode
#define v4x2di_UP  E_V4x2DImode
#define v4x2df_UP  E_V4x2DFmode
#define UP(X) X##_UP

#define SIMD_MAX_BUILTIN_ARGS 5

enum aarch64_type_qualifiers
{
  /* T foo.  */
  qualifier_none = 0x0,
  /* unsigned T foo.  */
  qualifier_unsigned = 0x1, /* 1 << 0  */
  /* const T foo.  */
  qualifier_const = 0x2, /* 1 << 1  */
  /* T *foo.  */
  qualifier_pointer = 0x4, /* 1 << 2  */
  /* Used when expanding arguments if an operand could
     be an immediate.  */
  qualifier_immediate = 0x8, /* 1 << 3  */
  qualifier_maybe_immediate = 0x10, /* 1 << 4  */
  /* void foo (...).  */
  qualifier_void = 0x20, /* 1 << 5  */
  /* Some patterns may have internal operands, this qualifier is an
     instruction to the initialisation code to skip this operand.  */
  qualifier_internal = 0x40, /* 1 << 6  */
  /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
     rather than using the type of the operand.  */
  qualifier_map_mode = 0x80, /* 1 << 7  */
  /* qualifier_pointer | qualifier_map_mode  */
  qualifier_pointer_map_mode = 0x84,
  /* qualifier_const | qualifier_pointer | qualifier_map_mode  */
  qualifier_const_pointer_map_mode = 0x86,
  /* Polynomial types.  */
  qualifier_poly = 0x100,
  /* Lane indices - must be in range, and flipped for bigendian.  */
  qualifier_lane_index = 0x200,
  /* Lane indices for single lane structure loads and stores.  */
  qualifier_struct_load_store_lane_index = 0x400,
  /* Lane indices selected in pairs. - must be in range, and flipped for
     bigendian.  */
  qualifier_lane_pair_index = 0x800,
  /* Lane indices selected in quadtuplets. - must be in range, and flipped for
     bigendian.  */
  qualifier_lane_quadtup_index = 0x1000,
};

/* Flags that describe what a function might do.  */
const unsigned int FLAG_NONE = 0U;
const unsigned int FLAG_READ_FPCR = 1U << 0;
const unsigned int FLAG_RAISE_FP_EXCEPTIONS = 1U << 1;
const unsigned int FLAG_READ_MEMORY = 1U << 2;
const unsigned int FLAG_PREFETCH_MEMORY = 1U << 3;
const unsigned int FLAG_WRITE_MEMORY = 1U << 4;

/* Not all FP intrinsics raise FP exceptions or read FPCR register,
   use this flag to suppress it.  */
const unsigned int FLAG_AUTO_FP = 1U << 5;

const unsigned int FLAG_FP = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS;
const unsigned int FLAG_ALL = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS
  | FLAG_READ_MEMORY | FLAG_PREFETCH_MEMORY | FLAG_WRITE_MEMORY;
const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY | FLAG_AUTO_FP;
const unsigned int FLAG_LOAD = FLAG_READ_MEMORY | FLAG_AUTO_FP;

typedef struct
{
  const char *name;
  machine_mode mode;
  const enum insn_code code;
  unsigned int fcode;
  enum aarch64_type_qualifiers *qualifiers;
  unsigned int flags;
} aarch64_simd_builtin_datum;

static enum aarch64_type_qualifiers
aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none };
#define TYPES_UNOP (aarch64_types_unop_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned };
#define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_none };
#define TYPES_UNOPUS (aarch64_types_unopus_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
#define TYPES_BINOP (aarch64_types_binop_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
#define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
#define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_unsigned };
#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_none, qualifier_none };
#define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_poly, qualifier_poly, qualifier_poly };
#define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binop_ppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_poly, qualifier_poly, qualifier_unsigned };
#define TYPES_BINOP_PPU (aarch64_types_binop_ppu_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
#define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
#define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned,
      qualifier_unsigned, qualifier_unsigned };
#define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_ternopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned,
      qualifier_unsigned, qualifier_lane_index };
#define TYPES_TERNOPU_LANE (aarch64_types_ternopu_lane_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned,
      qualifier_unsigned, qualifier_immediate };
#define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_ternop_sssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_none, qualifier_unsigned };
#define TYPES_TERNOP_SSSU (aarch64_types_ternop_sssu_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
#define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_ternop_suss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_unsigned, qualifier_none, qualifier_none };
#define TYPES_TERNOP_SUSS (aarch64_types_ternop_suss_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binop_pppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_unsigned };
#define TYPES_TERNOP_PPPU (aarch64_types_binop_pppu_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_none,
      qualifier_none, qualifier_lane_pair_index };
#define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_none,
      qualifier_none, qualifier_lane_index };
#define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
      qualifier_unsigned, qualifier_lane_index };
#define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_unsigned,
      qualifier_none, qualifier_lane_quadtup_index };
#define TYPES_QUADOPSSUS_LANE_QUADTUP \
	(aarch64_types_quadopssus_lane_quadtup_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_none,
      qualifier_unsigned, qualifier_lane_quadtup_index };
#define TYPES_QUADOPSSSU_LANE_QUADTUP \
	(aarch64_types_quadopsssu_lane_quadtup_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
      qualifier_unsigned, qualifier_immediate };
#define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_immediate };
#define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
#define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_none, qualifier_immediate };
#define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_unsigned, qualifier_immediate };
#define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
#define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate };
#define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
#define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
#define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
#define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
#define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
#define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
      qualifier_immediate };
#define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_const_pointer_map_mode };
#define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
#define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_load1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_const_pointer_map_mode };
#define TYPES_LOAD1_U (aarch64_types_load1_u_qualifiers)
#define TYPES_LOADSTRUCT_U (aarch64_types_load1_u_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_load1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_poly, qualifier_const_pointer_map_mode };
#define TYPES_LOAD1_P (aarch64_types_load1_p_qualifiers)
#define TYPES_LOADSTRUCT_P (aarch64_types_load1_p_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_const_pointer_map_mode,
      qualifier_none, qualifier_struct_load_store_lane_index };
#define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_loadstruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_const_pointer_map_mode,
      qualifier_unsigned, qualifier_struct_load_store_lane_index };
#define TYPES_LOADSTRUCT_LANE_U (aarch64_types_loadstruct_lane_u_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_loadstruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_poly, qualifier_const_pointer_map_mode,
      qualifier_poly, qualifier_struct_load_store_lane_index };
#define TYPES_LOADSTRUCT_LANE_P (aarch64_types_loadstruct_lane_p_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_poly, qualifier_unsigned,
      qualifier_poly, qualifier_poly };
#define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_none, qualifier_unsigned,
      qualifier_none, qualifier_none };
#define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_unsigned, qualifier_unsigned,
      qualifier_unsigned, qualifier_unsigned };
#define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)

/* The first argument (return type) of a store should be void type,
   which we represent with qualifier_void.  Their first operand will be
   a DImode pointer to the location to store to, so we must use
   qualifier_map_mode | qualifier_pointer to build a pointer to the
   element type of the vector.  */
static enum aarch64_type_qualifiers
aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
#define TYPES_STORE1 (aarch64_types_store1_qualifiers)
#define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_store1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_void, qualifier_pointer_map_mode, qualifier_unsigned };
#define TYPES_STORE1_U (aarch64_types_store1_u_qualifiers)
#define TYPES_STORESTRUCT_U (aarch64_types_store1_u_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
#define TYPES_STORE1_P (aarch64_types_store1_p_qualifiers)
#define TYPES_STORESTRUCT_P (aarch64_types_store1_p_qualifiers)

static enum aarch64_type_qualifiers
aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_void, qualifier_pointer_map_mode,
      qualifier_none, qualifier_struct_load_store_lane_index };
#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_storestruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_void, qualifier_pointer_map_mode,
      qualifier_unsigned, qualifier_struct_load_store_lane_index };
#define TYPES_STORESTRUCT_LANE_U (aarch64_types_storestruct_lane_u_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_storestruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
  = { qualifier_void, qualifier_pointer_map_mode,
      qualifier_poly, qualifier_struct_load_store_lane_index };
#define TYPES_STORESTRUCT_LANE_P (aarch64_types_storestruct_lane_p_qualifiers)

#define CF0(N, X) CODE_FOR_aarch64_##N##X
#define CF1(N, X) CODE_FOR_##N##X##1
#define CF2(N, X) CODE_FOR_##N##X##2
#define CF3(N, X) CODE_FOR_##N##X##3
#define CF4(N, X) CODE_FOR_##N##X##4
#define CF10(N, X) CODE_FOR_##N##X

#define VAR1(T, N, MAP, FLAG, A) \
  {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T, FLAG_##FLAG},
#define VAR2(T, N, MAP, FLAG, A, B) \
  VAR1 (T, N, MAP, FLAG, A) \
  VAR1 (T, N, MAP, FLAG, B)
#define VAR3(T, N, MAP, FLAG, A, B, C) \
  VAR2 (T, N, MAP, FLAG, A, B) \
  VAR1 (T, N, MAP, FLAG, C)
#define VAR4(T, N, MAP, FLAG, A, B, C, D) \
  VAR3 (T, N, MAP, FLAG, A, B, C) \
  VAR1 (T, N, MAP, FLAG, D)
#define VAR5(T, N, MAP, FLAG, A, B, C, D, E) \
  VAR4 (T, N, MAP, FLAG, A, B, C, D) \
  VAR1 (T, N, MAP, FLAG, E)
#define VAR6(T, N, MAP, FLAG, A, B, C, D, E, F) \
  VAR5 (T, N, MAP, FLAG, A, B, C, D, E) \
  VAR1 (T, N, MAP, FLAG, F)
#define VAR7(T, N, MAP, FLAG, A, B, C, D, E, F, G) \
  VAR6 (T, N, MAP, FLAG, A, B, C, D, E, F) \
  VAR1 (T, N, MAP, FLAG, G)
#define VAR8(T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
  VAR7 (T, N, MAP, FLAG, A, B, C, D, E, F, G) \
  VAR1 (T, N, MAP, FLAG, H)
#define VAR9(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
  VAR8 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
  VAR1 (T, N, MAP, FLAG, I)
#define VAR10(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
  VAR9 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
  VAR1 (T, N, MAP, FLAG, J)
#define VAR11(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
  VAR10 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
  VAR1 (T, N, MAP, FLAG, K)
#define VAR12(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
  VAR11 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
  VAR1 (T, N, MAP, FLAG, L)
#define VAR13(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
  VAR12 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
  VAR1 (T, N, MAP, FLAG, M)
#define VAR14(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
  VAR13 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
  VAR1 (T, X, MAP, FLAG, N)
#define VAR15(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
  VAR14 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
  VAR1 (T, X, MAP, FLAG, O)
#define VAR16(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
  VAR15 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
  VAR1 (T, X, MAP, FLAG, P)

#include "aarch64-builtin-iterators.h"

static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
#include "aarch64-simd-builtins.def"
};

/* There's only 8 CRC32 builtins.  Probably not worth their own .def file.  */
#define AARCH64_CRC32_BUILTINS \
  CRC32_BUILTIN (crc32b, QI) \
  CRC32_BUILTIN (crc32h, HI) \
  CRC32_BUILTIN (crc32w, SI) \
  CRC32_BUILTIN (crc32x, DI) \
  CRC32_BUILTIN (crc32cb, QI) \
  CRC32_BUILTIN (crc32ch, HI) \
  CRC32_BUILTIN (crc32cw, SI) \
  CRC32_BUILTIN (crc32cx, DI)

/* The next 8 FCMLA instrinsics require some special handling compared the
   normal simd intrinsics.  */
#define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
  FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
  FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
  FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
  FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
  FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
  FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
  FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
  FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \

typedef struct
{
  const char *name;
  machine_mode mode;
  const enum insn_code icode;
  unsigned int fcode;
} aarch64_crc_builtin_datum;

/* Hold information about how to expand the FCMLA_LANEQ builtins.  */
typedef struct
{
  const char *name;
  machine_mode mode;
  const enum insn_code icode;
  unsigned int fcode;
  bool lane;
} aarch64_fcmla_laneq_builtin_datum;

#define CRC32_BUILTIN(N, M) \
  AARCH64_BUILTIN_##N,

#define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
  AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,

#undef VAR1
#define VAR1(T, N, MAP, FLAG, A) \
  AARCH64_SIMD_BUILTIN_##T##_##N##A,

enum aarch64_builtins
{
  AARCH64_BUILTIN_MIN,

  AARCH64_BUILTIN_GET_FPCR,
  AARCH64_BUILTIN_SET_FPCR,
  AARCH64_BUILTIN_GET_FPSR,
  AARCH64_BUILTIN_SET_FPSR,

  AARCH64_BUILTIN_GET_FPCR64,
  AARCH64_BUILTIN_SET_FPCR64,
  AARCH64_BUILTIN_GET_FPSR64,
  AARCH64_BUILTIN_SET_FPSR64,

  AARCH64_BUILTIN_RSQRT_DF,
  AARCH64_BUILTIN_RSQRT_SF,
  AARCH64_BUILTIN_RSQRT_V2DF,
  AARCH64_BUILTIN_RSQRT_V2SF,
  AARCH64_BUILTIN_RSQRT_V4SF,
  AARCH64_SIMD_BUILTIN_BASE,
  AARCH64_SIMD_BUILTIN_LANE_CHECK,
#include "aarch64-simd-builtins.def"
  /* The first enum element which is based on an insn_data pattern.  */
  AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1,
  AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START
			      + ARRAY_SIZE (aarch64_simd_builtin_data) - 1,
  AARCH64_CRC32_BUILTIN_BASE,
  AARCH64_CRC32_BUILTINS
  AARCH64_CRC32_BUILTIN_MAX,
  /* ARMv8.3-A Pointer Authentication Builtins.  */
  AARCH64_PAUTH_BUILTIN_AUTIA1716,
  AARCH64_PAUTH_BUILTIN_PACIA1716,
  AARCH64_PAUTH_BUILTIN_AUTIB1716,
  AARCH64_PAUTH_BUILTIN_PACIB1716,
  AARCH64_PAUTH_BUILTIN_XPACLRI,
  /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins.  */
  AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
  AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
  /* Builtin for Arm8.3-a Javascript conversion instruction.  */
  AARCH64_JSCVT,
  /* TME builtins.  */
  AARCH64_TME_BUILTIN_TSTART,
  AARCH64_TME_BUILTIN_TCOMMIT,
  AARCH64_TME_BUILTIN_TTEST,
  AARCH64_TME_BUILTIN_TCANCEL,
  /* Armv8.5-a RNG instruction builtins.  */
  AARCH64_BUILTIN_RNG_RNDR,
  AARCH64_BUILTIN_RNG_RNDRRS,
  /* MEMTAG builtins.  */
  AARCH64_MEMTAG_BUILTIN_START,
  AARCH64_MEMTAG_BUILTIN_IRG,
  AARCH64_MEMTAG_BUILTIN_GMI,
  AARCH64_MEMTAG_BUILTIN_SUBP,
  AARCH64_MEMTAG_BUILTIN_INC_TAG,
  AARCH64_MEMTAG_BUILTIN_SET_TAG,
  AARCH64_MEMTAG_BUILTIN_GET_TAG,
  AARCH64_MEMTAG_BUILTIN_END,
  /* LS64 builtins.  */
  AARCH64_LS64_BUILTIN_LD64B,
  AARCH64_LS64_BUILTIN_ST64B,
  AARCH64_LS64_BUILTIN_ST64BV,
  AARCH64_LS64_BUILTIN_ST64BV0,
  AARCH64_BUILTIN_MAX
};

#undef CRC32_BUILTIN
#define CRC32_BUILTIN(N, M) \
  {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},

static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
  AARCH64_CRC32_BUILTINS
};


#undef FCMLA_LANEQ_BUILTIN
#define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
  {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
   AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},

/* This structure contains how to manage the mapping form the builtin to the
   instruction to generate in the backend and how to invoke the instruction.  */
static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
  AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
};

#undef CRC32_BUILTIN

static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];

#define NUM_DREG_TYPES 6
#define NUM_QREG_TYPES 6

/* Internal scalar builtin types.  These types are used to support
   neon intrinsic builtins.  They are _not_ user-visible types.  Therefore
   the mangling for these types are implementation defined.  */
const char *aarch64_scalar_builtin_types[] = {
  "__builtin_aarch64_simd_qi",
  "__builtin_aarch64_simd_hi",
  "__builtin_aarch64_simd_si",
  "__builtin_aarch64_simd_hf",
  "__builtin_aarch64_simd_sf",
  "__builtin_aarch64_simd_di",
  "__builtin_aarch64_simd_df",
  "__builtin_aarch64_simd_poly8",
  "__builtin_aarch64_simd_poly16",
  "__builtin_aarch64_simd_poly64",
  "__builtin_aarch64_simd_poly128",
  "__builtin_aarch64_simd_ti",
  "__builtin_aarch64_simd_uqi",
  "__builtin_aarch64_simd_uhi",
  "__builtin_aarch64_simd_usi",
  "__builtin_aarch64_simd_udi",
  "__builtin_aarch64_simd_ei",
  "__builtin_aarch64_simd_oi",
  "__builtin_aarch64_simd_ci",
  "__builtin_aarch64_simd_xi",
  "__builtin_aarch64_simd_bf",
  NULL
};

#define ENTRY(E, M, Q, G) E,
enum aarch64_simd_type
{
#include "aarch64-simd-builtin-types.def"
  ARM_NEON_H_TYPES_LAST
};
#undef ENTRY

struct GTY(()) aarch64_simd_type_info
{
  enum aarch64_simd_type type;

  /* Internal type name.  */
  const char *name;

  /* Internal type name(mangled).  The mangled names conform to the
     AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture",
     Appendix A).  To qualify for emission with the mangled names defined in
     that document, a vector type must not only be of the correct mode but also
     be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these
     types are registered by aarch64_init_simd_builtin_types ().  In other
     words, vector types defined in other ways e.g. via vector_size attribute
     will get default mangled names.  */
  const char *mangle;

  /* Internal type.  */
  tree itype;

  /* Element type.  */
  tree eltype;

  /* Machine mode the internal type maps to.  */
  enum machine_mode mode;

  /* Qualifiers.  */
  enum aarch64_type_qualifiers q;
};

#define ENTRY(E, M, Q, G)  \
  {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
static GTY(()) struct aarch64_simd_type_info aarch64_simd_types [] = {
#include "aarch64-simd-builtin-types.def"
};
#undef ENTRY

static machine_mode aarch64_simd_tuple_modes[ARM_NEON_H_TYPES_LAST][3];
static GTY(()) tree aarch64_simd_tuple_types[ARM_NEON_H_TYPES_LAST][3];

static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE;
static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE;
static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE;

/* The user-visible __fp16 type, and a pointer to that type.  Used
   across the back-end.  */
tree aarch64_fp16_type_node = NULL_TREE;
tree aarch64_fp16_ptr_type_node = NULL_TREE;

/* Back-end node type for brain float (bfloat) types.  */
tree aarch64_bf16_type_node = NULL_TREE;
tree aarch64_bf16_ptr_type_node = NULL_TREE;

/* Wrapper around add_builtin_function.  NAME is the name of the built-in
   function, TYPE is the function type, CODE is the function subcode
   (relative to AARCH64_BUILTIN_GENERAL), and ATTRS is the function
   attributes.  */
static tree
aarch64_general_add_builtin (const char *name, tree type, unsigned int code,
			     tree attrs = NULL_TREE)
{
  code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
  return add_builtin_function (name, type, code, BUILT_IN_MD,
			       NULL, attrs);
}

static const char *
aarch64_mangle_builtin_scalar_type (const_tree type)
{
  int i = 0;

  while (aarch64_scalar_builtin_types[i] != NULL)
    {
      const char *name = aarch64_scalar_builtin_types[i];

      if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
	  && DECL_NAME (TYPE_NAME (type))
	  && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name))
	return aarch64_scalar_builtin_types[i];
      i++;
    }
  return NULL;
}

static const char *
aarch64_mangle_builtin_vector_type (const_tree type)
{
  tree attrs = TYPE_ATTRIBUTES (type);
  if (tree attr = lookup_attribute ("Advanced SIMD type", attrs))
    {
      tree mangled_name = TREE_VALUE (TREE_VALUE (attr));
      return IDENTIFIER_POINTER (mangled_name);
    }

  return NULL;
}

const char *
aarch64_general_mangle_builtin_type (const_tree type)
{
  const char *mangle;
  /* Walk through all the AArch64 builtins types tables to filter out the
     incoming type.  */
  if ((mangle = aarch64_mangle_builtin_vector_type (type))
      || (mangle = aarch64_mangle_builtin_scalar_type (type)))
    return mangle;

  return NULL;
}

static tree
aarch64_simd_builtin_std_type (machine_mode mode,
			       enum aarch64_type_qualifiers q)
{
#define QUAL_TYPE(M)  \
  ((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node);
  switch (mode)
    {
    case E_QImode:
      return QUAL_TYPE (QI);
    case E_HImode:
      return QUAL_TYPE (HI);
    case E_SImode:
      return QUAL_TYPE (SI);
    case E_DImode:
      return QUAL_TYPE (DI);
    case E_TImode:
      return QUAL_TYPE (TI);
    case E_OImode:
      return aarch64_simd_intOI_type_node;
    case E_CImode:
      return aarch64_simd_intCI_type_node;
    case E_XImode:
      return aarch64_simd_intXI_type_node;
    case E_HFmode:
      return aarch64_fp16_type_node;
    case E_SFmode:
      return float_type_node;
    case E_DFmode:
      return double_type_node;
    case E_BFmode:
      return aarch64_bf16_type_node;
    default:
      gcc_unreachable ();
    }
#undef QUAL_TYPE
}

static tree
aarch64_lookup_simd_builtin_type (machine_mode mode,
				  enum aarch64_type_qualifiers q)
{
  int i;
  int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);

  /* Non-poly scalar modes map to standard types not in the table.  */
  if (q != qualifier_poly && !VECTOR_MODE_P (mode))
    return aarch64_simd_builtin_std_type (mode, q);

  for (i = 0; i < nelts; i++)
    {
      if (aarch64_simd_types[i].mode == mode
	  && aarch64_simd_types[i].q == q)
	return aarch64_simd_types[i].itype;
      if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
	for (int j = 0; j < 3; j++)
	  if (aarch64_simd_tuple_modes[i][j] == mode
	      && aarch64_simd_types[i].q == q)
	    return aarch64_simd_tuple_types[i][j];
    }

  return NULL_TREE;
}

static tree
aarch64_simd_builtin_type (machine_mode mode,
			   bool unsigned_p, bool poly_p)
{
  if (poly_p)
    return aarch64_lookup_simd_builtin_type (mode, qualifier_poly);
  else if (unsigned_p)
    return aarch64_lookup_simd_builtin_type (mode, qualifier_unsigned);
  else
    return aarch64_lookup_simd_builtin_type (mode, qualifier_none);
}
 
static void
aarch64_init_simd_builtin_types (void)
{
  int i;
  int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
  tree tdecl;

  /* Init all the element types built by the front-end.  */
  aarch64_simd_types[Int8x8_t].eltype = intQI_type_node;
  aarch64_simd_types[Int8x16_t].eltype = intQI_type_node;
  aarch64_simd_types[Int16x4_t].eltype = intHI_type_node;
  aarch64_simd_types[Int16x8_t].eltype = intHI_type_node;
  aarch64_simd_types[Int32x2_t].eltype = intSI_type_node;
  aarch64_simd_types[Int32x4_t].eltype = intSI_type_node;
  aarch64_simd_types[Int64x1_t].eltype = intDI_type_node;
  aarch64_simd_types[Int64x2_t].eltype = intDI_type_node;
  aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node;
  aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node;
  aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node;
  aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node;
  aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node;
  aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node;
  aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node;
  aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node;

  /* Poly types are a world of their own.  */
  aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype =
    build_distinct_type_copy (unsigned_intQI_type_node);
  /* Prevent front-ends from transforming Poly8_t arrays into string
     literals.  */
  TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false;

  aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype =
    build_distinct_type_copy (unsigned_intHI_type_node);
  aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype =
    build_distinct_type_copy (unsigned_intDI_type_node);
  aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype =
    build_distinct_type_copy (unsigned_intTI_type_node);
  /* Init poly vector element types with scalar poly types.  */
  aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype;
  aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype;
  aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype;
  aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype;
  aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype;
  aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;

  /* Continue with standard types.  */
  aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
  aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
  aarch64_simd_types[Float32x2_t].eltype = float_type_node;
  aarch64_simd_types[Float32x4_t].eltype = float_type_node;
  aarch64_simd_types[Float64x1_t].eltype = double_type_node;
  aarch64_simd_types[Float64x2_t].eltype = double_type_node;

  /* Init Bfloat vector types with underlying __bf16 type.  */
  aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
  aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;

  for (i = 0; i < nelts; i++)
    {
      tree eltype = aarch64_simd_types[i].eltype;
      machine_mode mode = aarch64_simd_types[i].mode;

      if (aarch64_simd_types[i].itype == NULL)
	{
	  tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
	  type = build_distinct_type_copy (type);
	  SET_TYPE_STRUCTURAL_EQUALITY (type);

	  tree mangled_name = get_identifier (aarch64_simd_types[i].mangle);
	  tree value = tree_cons (NULL_TREE, mangled_name, NULL_TREE);
	  TYPE_ATTRIBUTES (type)
	    = tree_cons (get_identifier ("Advanced SIMD type"), value,
			 TYPE_ATTRIBUTES (type));
	  aarch64_simd_types[i].itype = type;
	}

      tdecl = add_builtin_type (aarch64_simd_types[i].name,
				aarch64_simd_types[i].itype);
      TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
    }

#define AARCH64_BUILD_SIGNED_TYPE(mode)  \
  make_signed_type (GET_MODE_PRECISION (mode));
  aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode);
  aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode);
  aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode);
#undef AARCH64_BUILD_SIGNED_TYPE

  tdecl = add_builtin_type
	    ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node);
  TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl;
  tdecl = add_builtin_type
	    ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node);
  TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl;
  tdecl = add_builtin_type
	    ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node);
  TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl;
}

static void
aarch64_init_simd_builtin_scalar_types (void)
{
  /* Define typedefs for all the standard scalar types.  */
  (*lang_hooks.types.register_builtin_type) (intQI_type_node,
					     "__builtin_aarch64_simd_qi");
  (*lang_hooks.types.register_builtin_type) (intHI_type_node,
					     "__builtin_aarch64_simd_hi");
  (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
					     "__builtin_aarch64_simd_hf");
  (*lang_hooks.types.register_builtin_type) (intSI_type_node,
					     "__builtin_aarch64_simd_si");
  (*lang_hooks.types.register_builtin_type) (float_type_node,
					     "__builtin_aarch64_simd_sf");
  (*lang_hooks.types.register_builtin_type) (intDI_type_node,
					     "__builtin_aarch64_simd_di");
  (*lang_hooks.types.register_builtin_type) (double_type_node,
					     "__builtin_aarch64_simd_df");
  (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
					     "__builtin_aarch64_simd_poly8");
  (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
					     "__builtin_aarch64_simd_poly16");
  (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
					     "__builtin_aarch64_simd_poly64");
  (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node,
					     "__builtin_aarch64_simd_poly128");
  (*lang_hooks.types.register_builtin_type) (intTI_type_node,
					     "__builtin_aarch64_simd_ti");
  (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node,
					     "__builtin_aarch64_simd_bf");
  /* Unsigned integer types for various mode sizes.  */
  (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
					     "__builtin_aarch64_simd_uqi");
  (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
					     "__builtin_aarch64_simd_uhi");
  (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node,
					     "__builtin_aarch64_simd_usi");
  (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
					     "__builtin_aarch64_simd_udi");
}

/* Return a set of FLAG_* flags derived from FLAGS
   that describe what a function with result MODE could do,
   taking the command-line flags into account.  */
static unsigned int
aarch64_call_properties (unsigned int flags, machine_mode mode)
{
  if (!(flags & FLAG_AUTO_FP) && FLOAT_MODE_P (mode))
    flags |= FLAG_FP;

  /* -fno-trapping-math means that we can assume any FP exceptions
     are not user-visible.  */
  if (!flag_trapping_math)
    flags &= ~FLAG_RAISE_FP_EXCEPTIONS;

  return flags;
}

/* Return true if calls to a function with flags F and mode MODE
   could modify some form of global state.  */
static bool
aarch64_modifies_global_state_p (unsigned int f, machine_mode mode)
{
  unsigned int flags = aarch64_call_properties (f, mode);

  if (flags & FLAG_RAISE_FP_EXCEPTIONS)
    return true;

  if (flags & FLAG_PREFETCH_MEMORY)
    return true;

  return flags & FLAG_WRITE_MEMORY;
}

/* Return true if calls to a function with flags F and mode MODE
   could read some form of global state.  */
static bool
aarch64_reads_global_state_p (unsigned int f, machine_mode mode)
{
  unsigned int flags = aarch64_call_properties (f,  mode);

  if (flags & FLAG_READ_FPCR)
    return true;

  return flags & FLAG_READ_MEMORY;
}

/* Return true if calls to a function with flags F and mode MODE
   could raise a signal.  */
static bool
aarch64_could_trap_p (unsigned int f, machine_mode mode)
{
  unsigned int flags = aarch64_call_properties (f, mode);

  if (flags & FLAG_RAISE_FP_EXCEPTIONS)
    return true;

  if (flags & (FLAG_READ_MEMORY | FLAG_WRITE_MEMORY))
    return true;

  return false;
}

/* Add attribute NAME to ATTRS.  */
static tree
aarch64_add_attribute (const char *name, tree attrs)
{
  return tree_cons (get_identifier (name), NULL_TREE, attrs);
}

/* Return the appropriate attributes for a function that has
   flags F and mode MODE.  */
static tree
aarch64_get_attributes (unsigned int f, machine_mode mode)
{
  tree attrs = NULL_TREE;

  if (!aarch64_modifies_global_state_p (f, mode))
    {
      if (aarch64_reads_global_state_p (f, mode))
	attrs = aarch64_add_attribute ("pure", attrs);
      else
	attrs = aarch64_add_attribute ("const", attrs);
    }

  if (!flag_non_call_exceptions || !aarch64_could_trap_p (f, mode))
    attrs = aarch64_add_attribute ("nothrow", attrs);

  return aarch64_add_attribute ("leaf", attrs);
}

static bool aarch64_simd_builtins_initialized_p = false;

/* Due to the architecture not providing lane variant of the lane instructions
   for fcmla we can't use the standard simd builtin expansion code, but we
   still want the majority of the validation that would normally be done.  */

void
aarch64_init_fcmla_laneq_builtins (void)
{
  unsigned int i = 0;

  for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
    {
      aarch64_fcmla_laneq_builtin_datum* d
	= &aarch64_fcmla_lane_builtin_data[i];
      tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none);
      machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
      tree quadtype
	= aarch64_lookup_simd_builtin_type (quadmode, qualifier_none);
      tree lanetype
	= aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index);
      tree ftype = build_function_type_list (argtype, argtype, argtype,
					     quadtype, lanetype, NULL_TREE);
      tree attrs = aarch64_get_attributes (FLAG_FP, d->mode);
      tree fndecl
	= aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);

      aarch64_builtin_decls[d->fcode] = fndecl;
    }
}

void
aarch64_init_simd_builtin_functions (bool called_from_pragma)
{
  unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;

  if (!called_from_pragma)
    {
      tree lane_check_fpr = build_function_type_list (void_type_node,
						      size_type_node,
						      size_type_node,
						      intSI_type_node,
						      NULL);
      aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
	= aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
				       lane_check_fpr,
				       AARCH64_SIMD_BUILTIN_LANE_CHECK);
    }

  for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
    {
      bool print_type_signature_p = false;
      char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 };
      aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
      char namebuf[60];
      tree ftype = NULL;
      tree fndecl = NULL;

      d->fcode = fcode;

      /* We must track two variables here.  op_num is
	 the operand number as in the RTL pattern.  This is
	 required to access the mode (e.g. V4SF mode) of the
	 argument, from which the base type can be derived.
	 arg_num is an index in to the qualifiers data, which
	 gives qualifiers to the type (e.g. const unsigned).
	 The reason these two variables may differ by one is the
	 void return type.  While all return types take the 0th entry
	 in the qualifiers array, there is no operand for them in the
	 RTL pattern.  */
      int op_num = insn_data[d->code].n_operands - 1;
      int arg_num = d->qualifiers[0] & qualifier_void
		      ? op_num + 1
		      : op_num;
      tree return_type = void_type_node, args = void_list_node;
      tree eltype;

      int struct_mode_args = 0;
      for (int j = op_num; j >= 0; j--)
	{
	  machine_mode op_mode = insn_data[d->code].operand[j].mode;
	  if (aarch64_advsimd_struct_mode_p (op_mode))
	    struct_mode_args++;
	}

      if ((called_from_pragma && struct_mode_args == 0)
	  || (!called_from_pragma && struct_mode_args > 0))
	continue;

      /* Build a function type directly from the insn_data for this
	 builtin.  The build_function_type () function takes care of
	 removing duplicates for us.  */
      for (; op_num >= 0; arg_num--, op_num--)
	{
	  machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
	  enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];

	  if (qualifiers & qualifier_unsigned)
	    {
	      type_signature[op_num] = 'u';
	      print_type_signature_p = true;
	    }
	  else if (qualifiers & qualifier_poly)
	    {
	      type_signature[op_num] = 'p';
	      print_type_signature_p = true;
	    }
	  else
	    type_signature[op_num] = 's';

	  /* Skip an internal operand for vget_{low, high}.  */
	  if (qualifiers & qualifier_internal)
	    continue;

	  /* Some builtins have different user-facing types
	     for certain arguments, encoded in d->mode.  */
	  if (qualifiers & qualifier_map_mode)
	      op_mode = d->mode;

	  /* For pointers, we want a pointer to the basic type
	     of the vector.  */
	  if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
	    op_mode = GET_MODE_INNER (op_mode);

	  eltype = aarch64_simd_builtin_type
		     (op_mode,
		      (qualifiers & qualifier_unsigned) != 0,
		      (qualifiers & qualifier_poly) != 0);
	  gcc_assert (eltype != NULL);

	  /* Add qualifiers.  */
	  if (qualifiers & qualifier_const)
	    eltype = build_qualified_type (eltype, TYPE_QUAL_CONST);

	  if (qualifiers & qualifier_pointer)
	      eltype = build_pointer_type (eltype);

	  /* If we have reached arg_num == 0, we are at a non-void
	     return type.  Otherwise, we are still processing
	     arguments.  */
	  if (arg_num == 0)
	    return_type = eltype;
	  else
	    args = tree_cons (NULL_TREE, eltype, args);
	}

      ftype = build_function_type (return_type, args);

      gcc_assert (ftype != NULL);

      if (print_type_signature_p)
	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
		  d->name, type_signature);
      else
	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
		  d->name);

      tree attrs = aarch64_get_attributes (d->flags, d->mode);

      if (called_from_pragma)
	{
	  unsigned int raw_code
		= (fcode << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
	  fndecl = simulate_builtin_function_decl (input_location, namebuf,
						   ftype, raw_code, NULL,
						   attrs);
	}
      else
	fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs);

      aarch64_builtin_decls[fcode] = fndecl;
    }
}

/* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
   indexed by TYPE_INDEX.  */
static void
register_tuple_type (unsigned int num_vectors, unsigned int type_index)
{
  aarch64_simd_type_info *type = &aarch64_simd_types[type_index];

  /* Synthesize the name of the user-visible vector tuple type.  */
  const char *vector_type_name = type->name;
  char tuple_type_name[sizeof ("bfloat16x4x2_t")];
  snprintf (tuple_type_name, sizeof (tuple_type_name), "%.*sx%d_t",
	    (int) strlen (vector_type_name) - 4, vector_type_name + 2,
	    num_vectors);
  tuple_type_name[0] = TOLOWER (tuple_type_name[0]);

  tree vector_type = type->itype;
  tree array_type = build_array_type_nelts (vector_type, num_vectors);
  if (type->mode == DImode)
    {
      if (num_vectors == 2)
	SET_TYPE_MODE (array_type, V2x1DImode);
      else if (num_vectors == 3)
	SET_TYPE_MODE (array_type, V3x1DImode);
      else if (num_vectors == 4)
	SET_TYPE_MODE (array_type, V4x1DImode);
    }

  unsigned int alignment
    = known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64;
  machine_mode tuple_mode = TYPE_MODE_RAW (array_type);
  gcc_assert (VECTOR_MODE_P (tuple_mode)
	      && TYPE_MODE (array_type) == tuple_mode
	      && TYPE_ALIGN (array_type) == alignment);

  tree field = build_decl (input_location, FIELD_DECL,
			   get_identifier ("val"), array_type);

  tree t = lang_hooks.types.simulate_record_decl (input_location,
						  tuple_type_name,
						  make_array_slice (&field,
								    1));
  gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)
	      && (flag_pack_struct
		  || maximum_field_alignment
		  || (TYPE_MODE_RAW (t) == tuple_mode
		      && TYPE_ALIGN (t) == alignment)));

  aarch64_simd_tuple_modes[type_index][num_vectors - 2] = tuple_mode;
  aarch64_simd_tuple_types[type_index][num_vectors - 2] = t;
}

static bool
aarch64_scalar_builtin_type_p (aarch64_simd_type t)
{
  return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
}

/* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
   set.  */
aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags)
  : m_old_isa_flags (aarch64_isa_flags),
    m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
{
  /* Changing the ISA flags should be enough here.  We shouldn't need to
     pay the compile-time cost of a full target switch.  */
  aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags;
  global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
}

aarch64_simd_switcher::~aarch64_simd_switcher ()
{
  if (m_old_general_regs_only)
    global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
  aarch64_isa_flags = m_old_isa_flags;
}

/* Implement #pragma GCC aarch64 "arm_neon.h".  */
void
handle_arm_neon_h (void)
{
  aarch64_simd_switcher simd;

  /* Register the AdvSIMD vector tuple types.  */
  for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
    for (unsigned int count = 2; count <= 4; ++count)
      if (!aarch64_scalar_builtin_type_p (aarch64_simd_types[i].type))
	register_tuple_type (count, i);

  aarch64_init_simd_builtin_functions (true);
}

void
aarch64_init_simd_builtins (void)
{
  if (aarch64_simd_builtins_initialized_p)
    return;

  aarch64_simd_builtins_initialized_p = true;

  aarch64_init_simd_builtin_types ();

  /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
     Therefore we need to preserve the old __builtin scalar types.  It can be
     removed once all the intrinsics become strongly typed using the qualifier
     system.  */
  aarch64_init_simd_builtin_scalar_types ();

  aarch64_init_simd_builtin_functions (false);
  if (in_lto_p)
    handle_arm_neon_h ();

  /* Initialize the remaining fcmla_laneq intrinsics.  */
  aarch64_init_fcmla_laneq_builtins ();
}

static void
aarch64_init_crc32_builtins ()
{
  tree usi_type = aarch64_simd_builtin_std_type (SImode, qualifier_unsigned);
  unsigned int i = 0;

  for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
    {
      aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
      tree argtype = aarch64_simd_builtin_std_type (d->mode,
						    qualifier_unsigned);
      tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
      tree attrs = aarch64_get_attributes (FLAG_NONE, d->mode);
      tree fndecl
	= aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);

      aarch64_builtin_decls[d->fcode] = fndecl;
    }
}

/* Add builtins for reciprocal square root.  */

void
aarch64_init_builtin_rsqrt (void)
{
  tree fndecl = NULL;
  tree ftype = NULL;

  tree V2SF_type_node = build_vector_type (float_type_node, 2);
  tree V2DF_type_node = build_vector_type (double_type_node, 2);
  tree V4SF_type_node = build_vector_type (float_type_node, 4);

  struct builtin_decls_data
  {
    tree type_node;
    const char *builtin_name;
    int function_code;
  };

  builtin_decls_data bdda[] =
  {
    { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF },
    { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF },
    { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF },
    { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF },
    { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF }
  };

  builtin_decls_data *bdd = bdda;
  builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data));

  for (; bdd < bdd_end; bdd++)
  {
    ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
    tree attrs = aarch64_get_attributes (FLAG_FP, TYPE_MODE (bdd->type_node));
    fndecl = aarch64_general_add_builtin (bdd->builtin_name,
					  ftype, bdd->function_code, attrs);
    aarch64_builtin_decls[bdd->function_code] = fndecl;
  }
}

/* Initialize the backend types that support the user-visible __fp16
   type, also initialize a pointer to that type, to be used when
   forming HFAs.  */

static void
aarch64_init_fp16_types (void)
{
  aarch64_fp16_type_node = make_node (REAL_TYPE);
  TYPE_PRECISION (aarch64_fp16_type_node) = 16;
  layout_type (aarch64_fp16_type_node);

  (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
  aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
}

/* Initialize the backend REAL_TYPE type supporting bfloat types.  */
static void
aarch64_init_bf16_types (void)
{
  aarch64_bf16_type_node = make_node (REAL_TYPE);
  TYPE_PRECISION (aarch64_bf16_type_node) = 16;
  SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
  layout_type (aarch64_bf16_type_node);

  lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
  aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
}

/* Pointer authentication builtins that will become NOP on legacy platform.
   Currently, these builtins are for internal use only (libgcc EH unwinder).  */

void
aarch64_init_pauth_hint_builtins (void)
{
  /* Pointer Authentication builtins.  */
  tree ftype_pointer_auth
    = build_function_type_list (ptr_type_node, ptr_type_node,
				unsigned_intDI_type_node, NULL_TREE);
  tree ftype_pointer_strip
    = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);

  aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
    = aarch64_general_add_builtin ("__builtin_aarch64_autia1716",
				   ftype_pointer_auth,
				   AARCH64_PAUTH_BUILTIN_AUTIA1716);
  aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
    = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716",
				   ftype_pointer_auth,
				   AARCH64_PAUTH_BUILTIN_PACIA1716);
  aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716]
    = aarch64_general_add_builtin ("__builtin_aarch64_autib1716",
				   ftype_pointer_auth,
				   AARCH64_PAUTH_BUILTIN_AUTIB1716);
  aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716]
    = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716",
				   ftype_pointer_auth,
				   AARCH64_PAUTH_BUILTIN_PACIB1716);
  aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
    = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri",
				   ftype_pointer_strip,
				   AARCH64_PAUTH_BUILTIN_XPACLRI);
}

/* Initialize the transactional memory extension (TME) builtins.  */
static void
aarch64_init_tme_builtins (void)
{
  tree ftype_uint64_void
    = build_function_type_list (uint64_type_node, NULL);
  tree ftype_void_void
    = build_function_type_list (void_type_node, NULL);
  tree ftype_void_uint64
    = build_function_type_list (void_type_node, uint64_type_node, NULL);

  aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
    = aarch64_general_add_builtin ("__builtin_aarch64_tstart",
				   ftype_uint64_void,
				   AARCH64_TME_BUILTIN_TSTART);
  aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
    = aarch64_general_add_builtin ("__builtin_aarch64_ttest",
				   ftype_uint64_void,
				   AARCH64_TME_BUILTIN_TTEST);
  aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
    = aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
				   ftype_void_void,
				   AARCH64_TME_BUILTIN_TCOMMIT);
  aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
    = aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
				   ftype_void_uint64,
				   AARCH64_TME_BUILTIN_TCANCEL);
}

/* Add builtins for Random Number instructions.  */

static void
aarch64_init_rng_builtins (void)
{
  tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
  tree ftype
    = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
  aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
    = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype,
				   AARCH64_BUILTIN_RNG_RNDR);
  aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
    = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype,
				   AARCH64_BUILTIN_RNG_RNDRRS);
}

/* Initialize the memory tagging extension (MTE) builtins.  */
struct
{
  tree ftype;
  enum insn_code icode;
} aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END -
			      AARCH64_MEMTAG_BUILTIN_START - 1];

static void
aarch64_init_memtag_builtins (void)
{
  tree fntype = NULL;

#define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
  aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
    = aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
				   T, AARCH64_MEMTAG_BUILTIN_##F); \
  aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
			      AARCH64_MEMTAG_BUILTIN_START - 1] = \
				{T, CODE_FOR_##I};

  fntype = build_function_type_list (ptr_type_node, ptr_type_node,
				     uint64_type_node, NULL);
  AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);

  fntype = build_function_type_list (uint64_type_node, ptr_type_node,
				     uint64_type_node, NULL);
  AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);

  fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
				     ptr_type_node, NULL);
  AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);

  fntype = build_function_type_list (ptr_type_node, ptr_type_node,
				     unsigned_type_node, NULL);
  AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);

  fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
  AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);

  fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
  AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype);

#undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
}

/* Add builtins for Load/store 64 Byte instructions.  */

typedef struct
{
  const char *name;
  unsigned int code;
  tree type;
} ls64_builtins_data;

static GTY(()) tree ls64_arm_data_t = NULL_TREE;

static void
aarch64_init_ls64_builtins_types (void)
{
  /* Synthesize:

     typedef struct {
       uint64_t val[8];
     } __arm_data512_t;  */
  const char *tuple_type_name = "__arm_data512_t";
  tree node_type = get_typenode_from_name (UINT64_TYPE);
  tree array_type = build_array_type_nelts (node_type, 8);
  SET_TYPE_MODE (array_type, V8DImode);

  gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type));
  gcc_assert (TYPE_ALIGN (array_type) == 64);

  tree field = build_decl (input_location, FIELD_DECL,
                           get_identifier ("val"), array_type);

  ls64_arm_data_t = lang_hooks.types.simulate_record_decl (input_location,
                         tuple_type_name,
                         make_array_slice (&field, 1));

  gcc_assert (TYPE_MODE (ls64_arm_data_t) == V8DImode);
  gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t) == TYPE_MODE (ls64_arm_data_t));
  gcc_assert (TYPE_ALIGN (ls64_arm_data_t) == 64);
}

static void
aarch64_init_ls64_builtins (void)
{
  aarch64_init_ls64_builtins_types ();

  ls64_builtins_data data[4] = {
    {"__builtin_aarch64_ld64b", AARCH64_LS64_BUILTIN_LD64B,
     build_function_type_list (ls64_arm_data_t,
                               const_ptr_type_node, NULL_TREE)},
    {"__builtin_aarch64_st64b", AARCH64_LS64_BUILTIN_ST64B,
     build_function_type_list (void_type_node, ptr_type_node,
                               ls64_arm_data_t, NULL_TREE)},
    {"__builtin_aarch64_st64bv", AARCH64_LS64_BUILTIN_ST64BV,
     build_function_type_list (uint64_type_node, ptr_type_node,
                               ls64_arm_data_t, NULL_TREE)},
    {"__builtin_aarch64_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0,
     build_function_type_list (uint64_type_node, ptr_type_node,
                               ls64_arm_data_t, NULL_TREE)},
  };

  for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
    aarch64_builtin_decls[data[i].code]
      = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code);
}

/* Implement #pragma GCC aarch64 "arm_acle.h".  */
void
handle_arm_acle_h (void)
{
  if (TARGET_LS64)
    aarch64_init_ls64_builtins ();
}

/* Initialize fpsr fpcr getters and setters.  */

static void
aarch64_init_fpsr_fpcr_builtins (void)
{
  tree ftype_set
    = build_function_type_list (void_type_node, unsigned_type_node, NULL);
  tree ftype_get
    = build_function_type_list (unsigned_type_node, NULL);

  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
    = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr",
				   ftype_get,
				   AARCH64_BUILTIN_GET_FPCR);
  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
    = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr",
				   ftype_set,
				   AARCH64_BUILTIN_SET_FPCR);
  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
    = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr",
				   ftype_get,
				   AARCH64_BUILTIN_GET_FPSR);
  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
    = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr",
				   ftype_set,
				   AARCH64_BUILTIN_SET_FPSR);

  ftype_set
    = build_function_type_list (void_type_node, long_long_unsigned_type_node,
				NULL);
  ftype_get
    = build_function_type_list (long_long_unsigned_type_node, NULL);

  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR64]
    = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr64",
				   ftype_get,
				   AARCH64_BUILTIN_GET_FPCR64);
  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR64]
    = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr64",
				   ftype_set,
				   AARCH64_BUILTIN_SET_FPCR64);
  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR64]
    = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr64",
				   ftype_get,
				   AARCH64_BUILTIN_GET_FPSR64);
  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR64]
    = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr64",
				   ftype_set,
				   AARCH64_BUILTIN_SET_FPSR64);
}

/* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group.  */

void
aarch64_general_init_builtins (void)
{
  aarch64_init_fpsr_fpcr_builtins ();

  aarch64_init_fp16_types ();

  aarch64_init_bf16_types ();

  {
    aarch64_simd_switcher simd;
    aarch64_init_simd_builtins ();
  }

  aarch64_init_crc32_builtins ();
  aarch64_init_builtin_rsqrt ();
  aarch64_init_rng_builtins ();

  tree ftype_jcvt
    = build_function_type_list (intSI_type_node, double_type_node, NULL);
  aarch64_builtin_decls[AARCH64_JSCVT]
    = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt,
				   AARCH64_JSCVT);

  /* Initialize pointer authentication builtins which are backed by instructions
     in NOP encoding space.

     NOTE: these builtins are supposed to be used by libgcc unwinder only, as
     there is no support on return address signing under ILP32, we don't
     register them.  */
  if (!TARGET_ILP32)
    aarch64_init_pauth_hint_builtins ();

  if (TARGET_TME)
    aarch64_init_tme_builtins ();

  if (TARGET_MEMTAG)
    aarch64_init_memtag_builtins ();
}

/* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group.  */
tree
aarch64_general_builtin_decl (unsigned code, bool)
{
  if (code >= AARCH64_BUILTIN_MAX)
    return error_mark_node;

  return aarch64_builtin_decls[code];
}

typedef enum
{
  SIMD_ARG_COPY_TO_REG,
  SIMD_ARG_CONSTANT,
  SIMD_ARG_LANE_INDEX,
  SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
  SIMD_ARG_LANE_PAIR_INDEX,
  SIMD_ARG_LANE_QUADTUP_INDEX,
  SIMD_ARG_STOP
} builtin_simd_arg;


static rtx
aarch64_simd_expand_args (rtx target, int icode, int have_retval,
			  tree exp, builtin_simd_arg *args,
			  machine_mode builtin_mode)
{
  rtx pat;
  rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand.  */
  int opc = 0;

  if (have_retval)
    {
      machine_mode tmode = insn_data[icode].operand[0].mode;
      if (!target
	  || GET_MODE (target) != tmode
	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
	target = gen_reg_rtx (tmode);
      op[opc++] = target;
    }

  for (;;)
    {
      builtin_simd_arg thisarg = args[opc - have_retval];

      if (thisarg == SIMD_ARG_STOP)
	break;
      else
	{
	  tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
	  machine_mode mode = insn_data[icode].operand[opc].mode;
	  op[opc] = expand_normal (arg);

	  switch (thisarg)
	    {
	    case SIMD_ARG_COPY_TO_REG:
	      if (POINTER_TYPE_P (TREE_TYPE (arg)))
		op[opc] = convert_memory_address (Pmode, op[opc]);
	      /*gcc_assert (GET_MODE (op[opc]) == mode); */
	      if (!(*insn_data[icode].operand[opc].predicate)
		  (op[opc], mode))
		op[opc] = copy_to_mode_reg (mode, op[opc]);
	      break;

	    case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
	      gcc_assert (opc > 1);
	      if (CONST_INT_P (op[opc]))
		{
		  unsigned int nunits
		    = GET_MODE_NUNITS (builtin_mode).to_constant ();
		  aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
		  op[opc] = aarch64_endian_lane_rtx (builtin_mode,
						     INTVAL (op[opc]));
		}
	      goto constant_arg;

	    case SIMD_ARG_LANE_INDEX:
	      /* Must be a previous operand into which this is an index.  */
	      gcc_assert (opc > 0);
	      if (CONST_INT_P (op[opc]))
		{
		  machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
		  unsigned int nunits
		    = GET_MODE_NUNITS (vmode).to_constant ();
		  aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
		  op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
		}
	      /* If the lane index isn't a constant then error out.  */
	      goto constant_arg;

	    case SIMD_ARG_LANE_PAIR_INDEX:
	      /* Must be a previous operand into which this is an index and
		 index is restricted to nunits / 2.  */
	      gcc_assert (opc > 0);
	      if (CONST_INT_P (op[opc]))
		{
		  machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
		  unsigned int nunits
		    = GET_MODE_NUNITS (vmode).to_constant ();
		  aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
		  int lane = INTVAL (op[opc]);
		  op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
					  SImode);
		}
	      /* If the lane index isn't a constant then error out.  */
	      goto constant_arg;
	    case SIMD_ARG_LANE_QUADTUP_INDEX:
	      /* Must be a previous operand into which this is an index and
		 index is restricted to nunits / 4.  */
	      gcc_assert (opc > 0);
	      if (CONST_INT_P (op[opc]))
		{
		  machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
		  unsigned int nunits
		    = GET_MODE_NUNITS (vmode).to_constant ();
		  aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
		  int lane = INTVAL (op[opc]);
		  op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
					  SImode);
		}
	      /* If the lane index isn't a constant then error out.  */
	      goto constant_arg;
	    case SIMD_ARG_CONSTANT:
constant_arg:
	      if (!(*insn_data[icode].operand[opc].predicate)
		  (op[opc], mode))
	      {
		error_at (EXPR_LOCATION (exp),
			  "argument %d must be a constant immediate",
			  opc + 1 - have_retval);
		return const0_rtx;
	      }
	      break;

	    case SIMD_ARG_STOP:
	      gcc_unreachable ();
	    }

	  opc++;
	}
    }

  switch (opc)
    {
    case 1:
      pat = GEN_FCN (icode) (op[0]);
      break;

    case 2:
      pat = GEN_FCN (icode) (op[0], op[1]);
      break;

    case 3:
      pat = GEN_FCN (icode) (op[0], op[1], op[2]);
      break;

    case 4:
      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
      break;

    case 5:
      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
      break;

    case 6:
      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
      break;

    default:
      gcc_unreachable ();
    }

  if (!pat)
    return NULL_RTX;

  emit_insn (pat);

  return target;
}

/* Expand an AArch64 AdvSIMD builtin(intrinsic).  */
rtx
aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
{
  if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
    {
      rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0));
      rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1));
      if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize)
	  && UINTVAL (elementsize) != 0
	  && UINTVAL (totalsize) != 0)
	{
	  rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2));
          if (CONST_INT_P (lane_idx))
	    aarch64_simd_lane_bounds (lane_idx, 0,
				      UINTVAL (totalsize)
				       / UINTVAL (elementsize),
				      exp);
          else
	    error_at (EXPR_LOCATION (exp),
		      "lane index must be a constant immediate");
	}
      else
	error_at (EXPR_LOCATION (exp),
		  "total size and element size must be a nonzero "
		  "constant immediate");
      /* Don't generate any RTL.  */
      return const0_rtx;
    }
  aarch64_simd_builtin_datum *d =
		&aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
  enum insn_code icode = d->code;
  builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1];
  int num_args = insn_data[d->code].n_operands;
  int is_void = 0;
  int k;

  is_void = !!(d->qualifiers[0] & qualifier_void);

  num_args += is_void;

  for (k = 1; k < num_args; k++)
    {
      /* We have four arrays of data, each indexed in a different fashion.
	 qualifiers - element 0 always describes the function return type.
	 operands - element 0 is either the operand for return value (if
	   the function has a non-void return type) or the operand for the
	   first argument.
	 expr_args - element 0 always holds the first argument.
	 args - element 0 is always used for the return type.  */
      int qualifiers_k = k;
      int operands_k = k - is_void;
      int expr_args_k = k - 1;

      if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
	args[k] = SIMD_ARG_LANE_INDEX;
      else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
	args[k] = SIMD_ARG_LANE_PAIR_INDEX;
      else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
	args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
      else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
	args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
      else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
	args[k] = SIMD_ARG_CONSTANT;
      else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
	{
	  rtx arg
	    = expand_normal (CALL_EXPR_ARG (exp,
					    (expr_args_k)));
	  /* Handle constants only if the predicate allows it.  */
	  bool op_const_int_p =
	    (CONST_INT_P (arg)
	     && (*insn_data[icode].operand[operands_k].predicate)
		(arg, insn_data[icode].operand[operands_k].mode));
	  args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
	}
      else
	args[k] = SIMD_ARG_COPY_TO_REG;

    }
  args[k] = SIMD_ARG_STOP;

  /* The interface to aarch64_simd_expand_args expects a 0 if
     the function is void, and a 1 if it is not.  */
  return aarch64_simd_expand_args
	  (target, icode, !is_void, exp, &args[1], d->mode);
}

rtx
aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
{
  rtx pat;
  aarch64_crc_builtin_datum *d
    = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
  enum insn_code icode = d->icode;
  tree arg0 = CALL_EXPR_ARG (exp, 0);
  tree arg1 = CALL_EXPR_ARG (exp, 1);
  rtx op0 = expand_normal (arg0);
  rtx op1 = expand_normal (arg1);
  machine_mode tmode = insn_data[icode].operand[0].mode;
  machine_mode mode0 = insn_data[icode].operand[1].mode;
  machine_mode mode1 = insn_data[icode].operand[2].mode;

  if (! target
      || GET_MODE (target) != tmode
      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
    target = gen_reg_rtx (tmode);

  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));

  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
    op0 = copy_to_mode_reg (mode0, op0);
  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
    op1 = copy_to_mode_reg (mode1, op1);

  pat = GEN_FCN (icode) (target, op0, op1);
  if (!pat)
    return NULL_RTX;

  emit_insn (pat);
  return target;
}

/* Function to expand reciprocal square root builtins.  */

static rtx
aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
{
  tree arg0 = CALL_EXPR_ARG (exp, 0);
  rtx op0 = expand_normal (arg0);

  rtx (*gen) (rtx, rtx);

  switch (fcode)
    {
      case AARCH64_BUILTIN_RSQRT_DF:
	gen = gen_rsqrtdf2;
	break;
      case AARCH64_BUILTIN_RSQRT_SF:
	gen = gen_rsqrtsf2;
	break;
      case AARCH64_BUILTIN_RSQRT_V2DF:
	gen = gen_rsqrtv2df2;
	break;
      case AARCH64_BUILTIN_RSQRT_V2SF:
	gen = gen_rsqrtv2sf2;
	break;
      case AARCH64_BUILTIN_RSQRT_V4SF:
	gen = gen_rsqrtv4sf2;
	break;
      default: gcc_unreachable ();
    }

  if (!target)
    target = gen_reg_rtx (GET_MODE (op0));

  emit_insn (gen (target, op0));

  return target;
}

/* Expand a FCMLA lane expression EXP with code FCODE and
   result going to TARGET if that is convenient.  */

rtx
aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
{
  int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
  aarch64_fcmla_laneq_builtin_datum* d
    = &aarch64_fcmla_lane_builtin_data[bcode];
  machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
  rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
  rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
  rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
  tree tmp = CALL_EXPR_ARG (exp, 3);
  rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);

  /* Validate that the lane index is a constant.  */
  if (!CONST_INT_P (lane_idx))
    {
      error_at (EXPR_LOCATION (exp),
		"argument %d must be a constant immediate", 4);
      return const0_rtx;
    }

  /* Validate that the index is within the expected range.  */
  int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
  aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);

  /* Generate the correct register and mode.  */
  int lane = INTVAL (lane_idx);

  if (lane < nunits / 4)
    op2 = simplify_gen_subreg (d->mode, op2, quadmode,
			       subreg_lowpart_offset (d->mode, quadmode));
  else
    {
      /* Select the upper 64 bits, either a V2SF or V4HF, this however
	 is quite messy, as the operation required even though simple
	 doesn't have a simple RTL pattern, and seems it's quite hard to
	 define using a single RTL pattern.  The target generic version
	 gen_highpart_mode generates code that isn't optimal.  */
      rtx temp1 = gen_reg_rtx (d->mode);
      rtx temp2 = gen_reg_rtx (DImode);
      temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
				   subreg_lowpart_offset (d->mode, quadmode));
      temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
      if (BYTES_BIG_ENDIAN)
	emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
      else
	emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
      op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);

      /* And recalculate the index.  */
      lane -= nunits / 4;
    }

  /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
     (max nunits in range check) are valid.  Which means only 0-1, so we
     only need to know the order in a V2mode.  */
  lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);

  if (!target
      || !REG_P (target)
      || GET_MODE (target) != d->mode)
    target = gen_reg_rtx (d->mode);

  rtx pat = NULL_RTX;

  if (d->lane)
    pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
  else
    pat = GEN_FCN (d->icode) (target, op0, op1, op2);

  if (!pat)
    return NULL_RTX;

  emit_insn (pat);
  return target;
}

/* Function to expand an expression EXP which calls one of the Transactional
   Memory Extension (TME) builtins FCODE with the result going to TARGET.  */
static rtx
aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
{
  switch (fcode)
    {
    case AARCH64_TME_BUILTIN_TSTART:
      target = gen_reg_rtx (DImode);
      emit_insn (GEN_FCN (CODE_FOR_tstart) (target));
      break;

    case AARCH64_TME_BUILTIN_TTEST:
      target = gen_reg_rtx (DImode);
      emit_insn (GEN_FCN (CODE_FOR_ttest) (target));
      break;

    case AARCH64_TME_BUILTIN_TCOMMIT:
      emit_insn (GEN_FCN (CODE_FOR_tcommit) ());
      break;

    case AARCH64_TME_BUILTIN_TCANCEL:
      {
	tree arg0 = CALL_EXPR_ARG (exp, 0);
	rtx op0 = expand_normal (arg0);
	if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536)
	  emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0));
	else
	  {
	    error_at (EXPR_LOCATION (exp),
		      "argument must be a 16-bit constant immediate");
	    return const0_rtx;
	  }
      }
      break;

    default :
      gcc_unreachable ();
    }
    return target;
}

/* Function to expand an expression EXP which calls one of the Load/Store
   64 Byte extension (LS64) builtins FCODE with the result going to TARGET.  */
static rtx
aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target)
{
  expand_operand ops[3];

  switch (fcode)
    {
    case AARCH64_LS64_BUILTIN_LD64B:
      {
        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
        create_output_operand (&ops[0], target, V8DImode);
        create_input_operand (&ops[1], op0, DImode);
        expand_insn (CODE_FOR_ld64b, 2, ops);
        return ops[0].value;
      }
    case AARCH64_LS64_BUILTIN_ST64B:
      {
        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
        create_output_operand (&ops[0], op0, DImode);
        create_input_operand (&ops[1], op1, V8DImode);
        expand_insn (CODE_FOR_st64b, 2, ops);
        return const0_rtx;
      }
    case AARCH64_LS64_BUILTIN_ST64BV:
      {
        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
        create_output_operand (&ops[0], target, DImode);
        create_input_operand (&ops[1], op0, DImode);
        create_input_operand (&ops[2], op1, V8DImode);
        expand_insn (CODE_FOR_st64bv, 3, ops);
        return ops[0].value;
      }
    case AARCH64_LS64_BUILTIN_ST64BV0:
      {
        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
        create_output_operand (&ops[0], target, DImode);
        create_input_operand (&ops[1], op0, DImode);
        create_input_operand (&ops[2], op1, V8DImode);
        expand_insn (CODE_FOR_st64bv0, 3, ops);
        return ops[0].value;
      }
    }

  gcc_unreachable ();
}

/* Expand a random number builtin EXP with code FCODE, putting the result
   int TARGET.  If IGNORE is true the return value is ignored.  */

rtx
aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
{
  rtx pat;
  enum insn_code icode;
  if (fcode == AARCH64_BUILTIN_RNG_RNDR)
    icode = CODE_FOR_aarch64_rndr;
  else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
    icode = CODE_FOR_aarch64_rndrrs;
  else
    gcc_unreachable ();

  rtx rand = gen_reg_rtx (DImode);
  pat = GEN_FCN (icode) (rand);
  if (!pat)
    return NULL_RTX;

  tree arg0 = CALL_EXPR_ARG (exp, 0);
  rtx res_addr = expand_normal (arg0);
  res_addr = convert_memory_address (Pmode, res_addr);
  rtx res_mem = gen_rtx_MEM (DImode, res_addr);
  emit_insn (pat);
  emit_move_insn (res_mem, rand);
  /* If the status result is unused don't generate the CSET code.  */
  if (ignore)
    return target;

  rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
  rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
  emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
  return target;
}

/* Expand an expression EXP that calls a MEMTAG built-in FCODE
   with result going to TARGET.  */
static rtx
aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
{
  if (TARGET_ILP32)
    {
      error ("Memory Tagging Extension does not support %<-mabi=ilp32%>");
      return const0_rtx;
    }

  rtx pat = NULL;
  enum insn_code icode = aarch64_memtag_builtin_data[fcode -
			   AARCH64_MEMTAG_BUILTIN_START - 1].icode;

  rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
  machine_mode mode0 = GET_MODE (op0);
  op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0);
  op0 = convert_to_mode (DImode, op0, true);

  switch (fcode)
    {
      case AARCH64_MEMTAG_BUILTIN_IRG:
      case AARCH64_MEMTAG_BUILTIN_GMI:
      case AARCH64_MEMTAG_BUILTIN_SUBP:
      case AARCH64_MEMTAG_BUILTIN_INC_TAG:
	{
	  if (! target
	      || GET_MODE (target) != DImode
	      || ! (*insn_data[icode].operand[0].predicate) (target, DImode))
	    target = gen_reg_rtx (DImode);

	  if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG)
	    {
	      rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));

	      if ((*insn_data[icode].operand[3].predicate) (op1, QImode))
		{
		  pat = GEN_FCN (icode) (target, op0, const0_rtx, op1);
		  break;
		}
	      error_at (EXPR_LOCATION (exp),
			"argument %d must be a constant immediate "
			"in range [0,15]", 2);
	      return const0_rtx;
	    }
	  else
	    {
	      rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
	      machine_mode mode1 = GET_MODE (op1);
	      op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1);
	      op1 = convert_to_mode (DImode, op1, true);
	      pat = GEN_FCN (icode) (target, op0, op1);
	    }
	  break;
	}
      case AARCH64_MEMTAG_BUILTIN_GET_TAG:
	target = op0;
	pat = GEN_FCN (icode) (target, op0, const0_rtx);
	break;
      case AARCH64_MEMTAG_BUILTIN_SET_TAG:
	pat = GEN_FCN (icode) (op0, op0, const0_rtx);
	break;
      default:
	gcc_unreachable();
    }

  if (!pat)
    return NULL_RTX;

  emit_insn (pat);
  return target;
}

/* Expand an expression EXP as fpsr or fpcr setter (depending on
   UNSPEC) using MODE.  */
static void
aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
{
  tree arg = CALL_EXPR_ARG (exp, 0);
  rtx op = force_reg (mode, expand_normal (arg));
  emit_insn (gen_aarch64_set (unspec, mode, op));
}

/* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE.
   Return the target.  */
static rtx
aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
				 rtx target)
{
  expand_operand op;
  create_output_operand (&op, target, mode);
  expand_insn (icode, 1, &op);
  return op.value;
}

/* Expand an expression EXP that calls built-in function FCODE,
   with result going to TARGET if that's convenient.  IGNORE is true
   if the result of the builtin is ignored.  */
rtx
aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
				int ignore)
{
  int icode;
  rtx op0;
  tree arg0;

  switch (fcode)
    {
    case AARCH64_BUILTIN_GET_FPCR:
      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi,
					      SImode, target);
    case AARCH64_BUILTIN_SET_FPCR:
      aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, SImode, exp);
      return target;
    case AARCH64_BUILTIN_GET_FPSR:
      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi,
					      SImode, target);
    case AARCH64_BUILTIN_SET_FPSR:
      aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, SImode, exp);
      return target;
    case AARCH64_BUILTIN_GET_FPCR64:
      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi,
					      DImode, target);
    case AARCH64_BUILTIN_SET_FPCR64:
      aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, DImode, exp);
      return target;
    case AARCH64_BUILTIN_GET_FPSR64:
      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi,
					      DImode, target);
    case AARCH64_BUILTIN_SET_FPSR64:
      aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, DImode, exp);
      return target;
    case AARCH64_PAUTH_BUILTIN_AUTIA1716:
    case AARCH64_PAUTH_BUILTIN_PACIA1716:
    case AARCH64_PAUTH_BUILTIN_AUTIB1716:
    case AARCH64_PAUTH_BUILTIN_PACIB1716:
    case AARCH64_PAUTH_BUILTIN_XPACLRI:
      arg0 = CALL_EXPR_ARG (exp, 0);
      op0 = force_reg (Pmode, expand_normal (arg0));

      if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
	{
	  rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
	  icode = CODE_FOR_xpaclri;
	  emit_move_insn (lr, op0);
	  emit_insn (GEN_FCN (icode) ());
	  return lr;
	}
      else
	{
	  tree arg1 = CALL_EXPR_ARG (exp, 1);
	  rtx op1 = force_reg (Pmode, expand_normal (arg1));
	  switch (fcode)
	    {
	    case AARCH64_PAUTH_BUILTIN_AUTIA1716:
	      icode = CODE_FOR_autia1716;
	      break;
	    case AARCH64_PAUTH_BUILTIN_AUTIB1716:
	      icode = CODE_FOR_autib1716;
	      break;
	    case AARCH64_PAUTH_BUILTIN_PACIA1716:
	      icode = CODE_FOR_pacia1716;
	      break;
	    case AARCH64_PAUTH_BUILTIN_PACIB1716:
	      icode = CODE_FOR_pacib1716;
	      break;
	    default:
	      icode = 0;
	      gcc_unreachable ();
	    }

	  rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
	  rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
	  emit_move_insn (x17_reg, op0);
	  emit_move_insn (x16_reg, op1);
	  emit_insn (GEN_FCN (icode) ());
	  return x17_reg;
	}

    case AARCH64_JSCVT:
      {
	expand_operand ops[2];
	create_output_operand (&ops[0], target, SImode);
	op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
	create_input_operand (&ops[1], op0, DFmode);
	expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
	return ops[0].value;
      }

    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
    case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
      return aarch64_expand_fcmla_builtin (exp, target, fcode);
    case AARCH64_BUILTIN_RNG_RNDR:
    case AARCH64_BUILTIN_RNG_RNDRRS:
      return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
    }

  if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
    return aarch64_simd_expand_builtin (fcode, exp, target);
  else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
    return aarch64_crc32_expand_builtin (fcode, exp, target);

  if (fcode == AARCH64_BUILTIN_RSQRT_DF
      || fcode == AARCH64_BUILTIN_RSQRT_SF
      || fcode == AARCH64_BUILTIN_RSQRT_V2DF
      || fcode == AARCH64_BUILTIN_RSQRT_V2SF
      || fcode == AARCH64_BUILTIN_RSQRT_V4SF)
    return aarch64_expand_builtin_rsqrt (fcode, exp, target);

  if (fcode == AARCH64_TME_BUILTIN_TSTART
      || fcode == AARCH64_TME_BUILTIN_TCOMMIT
      || fcode == AARCH64_TME_BUILTIN_TTEST
      || fcode == AARCH64_TME_BUILTIN_TCANCEL)
    return aarch64_expand_builtin_tme (fcode, exp, target);

  if (fcode == AARCH64_LS64_BUILTIN_LD64B
      || fcode == AARCH64_LS64_BUILTIN_ST64B
      || fcode == AARCH64_LS64_BUILTIN_ST64BV
      || fcode == AARCH64_LS64_BUILTIN_ST64BV0)
    return aarch64_expand_builtin_ls64 (fcode, exp, target);

  if (fcode >= AARCH64_MEMTAG_BUILTIN_START
      && fcode <= AARCH64_MEMTAG_BUILTIN_END)
    return aarch64_expand_builtin_memtag (fcode, exp, target);

  gcc_unreachable ();
}

tree
aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
				     tree type_in)
{
  machine_mode in_mode, out_mode;

  if (TREE_CODE (type_out) != VECTOR_TYPE
      || TREE_CODE (type_in) != VECTOR_TYPE)
    return NULL_TREE;

  out_mode = TYPE_MODE (type_out);
  in_mode = TYPE_MODE (type_in);

#undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
#define AARCH64_FIND_FRINT_VARIANT(N) \
  (AARCH64_CHECK_BUILTIN_MODE (2, D) \
    ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \
    : (AARCH64_CHECK_BUILTIN_MODE (4, S) \
	? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \
	: (AARCH64_CHECK_BUILTIN_MODE (2, S) \
	   ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \
	   : NULL_TREE)))
  switch (fn)
    {
#undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
  (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode)
    CASE_CFN_FLOOR:
      return AARCH64_FIND_FRINT_VARIANT (floor);
    CASE_CFN_CEIL:
      return AARCH64_FIND_FRINT_VARIANT (ceil);
    CASE_CFN_TRUNC:
      return AARCH64_FIND_FRINT_VARIANT (btrunc);
    CASE_CFN_ROUND:
      return AARCH64_FIND_FRINT_VARIANT (round);
    CASE_CFN_NEARBYINT:
      return AARCH64_FIND_FRINT_VARIANT (nearbyint);
    CASE_CFN_SQRT:
      return AARCH64_FIND_FRINT_VARIANT (sqrt);
#undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
  (out_mode == V##C##SImode && in_mode == V##C##N##Imode)
    CASE_CFN_CLZ:
      {
	if (AARCH64_CHECK_BUILTIN_MODE (4, S))
	  return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
	return NULL_TREE;
      }
    CASE_CFN_CTZ:
      {
	if (AARCH64_CHECK_BUILTIN_MODE (2, S))
	  return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si];
	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
	  return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
	return NULL_TREE;
      }
#undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
  (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode)
    CASE_CFN_IFLOOR:
    CASE_CFN_LFLOOR:
    CASE_CFN_LLFLOOR:
      {
	enum aarch64_builtins builtin;
	if (AARCH64_CHECK_BUILTIN_MODE (2, D))
	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di;
	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si;
	else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si;
	else
	  return NULL_TREE;

	return aarch64_builtin_decls[builtin];
      }
    CASE_CFN_ICEIL:
    CASE_CFN_LCEIL:
    CASE_CFN_LLCEIL:
      {
	enum aarch64_builtins builtin;
	if (AARCH64_CHECK_BUILTIN_MODE (2, D))
	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di;
	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si;
	else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si;
	else
	  return NULL_TREE;

	return aarch64_builtin_decls[builtin];
      }
    CASE_CFN_IROUND:
    CASE_CFN_LROUND:
    CASE_CFN_LLROUND:
      {
	enum aarch64_builtins builtin;
	if (AARCH64_CHECK_BUILTIN_MODE (2, D))
	  builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di;
	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
	  builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si;
	else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
	  builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si;
	else
	  return NULL_TREE;

	return aarch64_builtin_decls[builtin];
      }
    default:
      return NULL_TREE;
    }

  return NULL_TREE;
}

/* Return builtin for reciprocal square root.  */

tree
aarch64_general_builtin_rsqrt (unsigned int fn)
{
  if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
    return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
  if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
    return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
  if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
    return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
  return NULL_TREE;
}

/* Return true if the lane check can be removed as there is no
   error going to be emitted.  */
static bool
aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
{
  if (TREE_CODE (arg0) != INTEGER_CST)
    return false;
  if (TREE_CODE (arg1) != INTEGER_CST)
    return false;
  if (TREE_CODE (arg2) != INTEGER_CST)
    return false;

  auto totalsize = wi::to_widest (arg0);
  auto elementsize = wi::to_widest (arg1);
  if (totalsize == 0 || elementsize == 0)
    return false;
  auto lane = wi::to_widest (arg2);
  auto high = wi::udiv_trunc (totalsize, elementsize);
  return wi::ltu_p (lane, high);
}

#undef VAR1
#define VAR1(T, N, MAP, FLAG, A) \
  case AARCH64_SIMD_BUILTIN_##T##_##N##A:

/* Try to fold a call to the built-in function with subcode FCODE.  The
   function is passed the N_ARGS arguments in ARGS and it returns a value
   of type TYPE.  Return the new expression on success and NULL_TREE on
   failure.  */
tree
aarch64_general_fold_builtin (unsigned int fcode, tree type,
			      unsigned int n_args ATTRIBUTE_UNUSED, tree *args)
{
  switch (fcode)
    {
      BUILTIN_VDQF (UNOP, abs, 2, ALL)
	return fold_build1 (ABS_EXPR, type, args[0]);
      VAR1 (UNOP, floatv2si, 2, ALL, v2sf)
      VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
      VAR1 (UNOP, floatv2di, 2, ALL, v2df)
	return fold_build1 (FLOAT_EXPR, type, args[0]);
      case AARCH64_SIMD_BUILTIN_LANE_CHECK:
	gcc_assert (n_args == 3);
	if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
	  return void_node;
	break;
      default:
	break;
    }

  return NULL_TREE;
}

enum aarch64_simd_type
get_mem_type_for_load_store (unsigned int fcode)
{
  switch (fcode)
  {
    VAR1 (LOAD1, ld1, 0, LOAD, v8qi)
    VAR1 (STORE1, st1, 0, STORE, v8qi)
      return Int8x8_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v16qi)
    VAR1 (STORE1, st1, 0, STORE, v16qi)
      return Int8x16_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v4hi)
    VAR1 (STORE1, st1, 0, STORE, v4hi)
      return Int16x4_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v8hi)
    VAR1 (STORE1, st1, 0, STORE, v8hi)
      return Int16x8_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v2si)
    VAR1 (STORE1, st1, 0, STORE, v2si)
      return Int32x2_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v4si)
    VAR1 (STORE1, st1, 0, STORE, v4si)
      return Int32x4_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v2di)
    VAR1 (STORE1, st1, 0, STORE, v2di)
      return Int64x2_t;
    VAR1 (LOAD1_U, ld1, 0, LOAD, v8qi)
    VAR1 (STORE1_U, st1, 0, STORE, v8qi)
      return Uint8x8_t;
    VAR1 (LOAD1_U, ld1, 0, LOAD, v16qi)
    VAR1 (STORE1_U, st1, 0, STORE, v16qi)
      return Uint8x16_t;
    VAR1 (LOAD1_U, ld1, 0, LOAD, v4hi)
    VAR1 (STORE1_U, st1, 0, STORE, v4hi)
      return Uint16x4_t;
    VAR1 (LOAD1_U, ld1, 0, LOAD, v8hi)
    VAR1 (STORE1_U, st1, 0, STORE, v8hi)
      return Uint16x8_t;
    VAR1 (LOAD1_U, ld1, 0, LOAD, v2si)
    VAR1 (STORE1_U, st1, 0, STORE, v2si)
      return Uint32x2_t;
    VAR1 (LOAD1_U, ld1, 0, LOAD, v4si)
    VAR1 (STORE1_U, st1, 0, STORE, v4si)
      return Uint32x4_t;
    VAR1 (LOAD1_U, ld1, 0, LOAD, v2di)
    VAR1 (STORE1_U, st1, 0, STORE, v2di)
      return Uint64x2_t;
    VAR1 (LOAD1_P, ld1, 0, LOAD, v8qi)
    VAR1 (STORE1_P, st1, 0, STORE, v8qi)
      return Poly8x8_t;
    VAR1 (LOAD1_P, ld1, 0, LOAD, v16qi)
    VAR1 (STORE1_P, st1, 0, STORE, v16qi)
      return Poly8x16_t;
    VAR1 (LOAD1_P, ld1, 0, LOAD, v4hi)
    VAR1 (STORE1_P, st1, 0, STORE, v4hi)
      return Poly16x4_t;
    VAR1 (LOAD1_P, ld1, 0, LOAD, v8hi)
    VAR1 (STORE1_P, st1, 0, STORE, v8hi)
      return Poly16x8_t;
    VAR1 (LOAD1_P, ld1, 0, LOAD, v2di)
    VAR1 (STORE1_P, st1, 0, STORE, v2di)
      return Poly64x2_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v4hf)
    VAR1 (STORE1, st1, 0, STORE, v4hf)
      return Float16x4_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v8hf)
    VAR1 (STORE1, st1, 0, STORE, v8hf)
      return Float16x8_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v4bf)
    VAR1 (STORE1, st1, 0, STORE, v4bf)
      return Bfloat16x4_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v8bf)
    VAR1 (STORE1, st1, 0, STORE, v8bf)
      return Bfloat16x8_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v2sf)
    VAR1 (STORE1, st1, 0, STORE, v2sf)
      return Float32x2_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v4sf)
    VAR1 (STORE1, st1, 0, STORE, v4sf)
      return Float32x4_t;
    VAR1 (LOAD1, ld1, 0, LOAD, v2df)
    VAR1 (STORE1, st1, 0, STORE, v2df)
      return Float64x2_t;
    default:
      gcc_unreachable ();
      break;
  }
}

/* Try to fold STMT, given that it's a call to the built-in function with
   subcode FCODE.  Return the new statement on success and null on
   failure.  */
gimple *
aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
				     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
{
  gimple *new_stmt = NULL;
  unsigned nargs = gimple_call_num_args (stmt);
  tree *args = (nargs > 0
		? gimple_call_arg_ptr (stmt, 0)
		: &error_mark_node);

  /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
     and unsigned int; it will distinguish according to the types of
     the arguments to the __builtin.  */
  switch (fcode)
    {
      BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, ALL)
	new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
					       1, args[0]);
	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
	break;

      /* Lower sqrt builtins to gimple/internal function sqrt. */
      BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
	new_stmt = gimple_build_call_internal (IFN_SQRT,
					       1, args[0]);
	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
	break;

     /*lower store and load neon builtins to gimple.  */
     BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
     BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
     BUILTIN_VALLP_NO_DI (LOAD1_P, ld1, 0, LOAD)
	if (!BYTES_BIG_ENDIAN)
	  {
	    enum aarch64_simd_type mem_type
	      = get_mem_type_for_load_store(fcode);
	    aarch64_simd_type_info simd_type
	      = aarch64_simd_types[mem_type];
	    tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
							     VOIDmode, true);
	    tree zero = build_zero_cst (elt_ptr_type);
	    /* Use element type alignment.  */
	    tree access_type
	      = build_aligned_type (simd_type.itype,
				    TYPE_ALIGN (simd_type.eltype));
	    new_stmt
	      = gimple_build_assign (gimple_get_lhs (stmt),
				     fold_build2 (MEM_REF,
						  access_type,
						  args[0], zero));
	  }
	break;

      BUILTIN_VALL_F16 (STORE1, st1, 0, STORE)
      BUILTIN_VDQ_I (STORE1_U, st1, 0, STORE)
      BUILTIN_VALLP_NO_DI (STORE1_P, st1, 0, STORE)
	if (!BYTES_BIG_ENDIAN)
	  {
	    enum aarch64_simd_type mem_type
	      = get_mem_type_for_load_store(fcode);
	    aarch64_simd_type_info simd_type
	      = aarch64_simd_types[mem_type];
	    tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
							     VOIDmode, true);
	    tree zero = build_zero_cst (elt_ptr_type);
	    /* Use element type alignment.  */
	    tree access_type
	      = build_aligned_type (simd_type.itype,
				    TYPE_ALIGN (simd_type.eltype));
	    new_stmt
	      = gimple_build_assign (fold_build2 (MEM_REF, access_type,
						  args[0], zero),
				     args[1]);
	  }
	break;

      BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL)
      BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL)
	new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
					       1, args[0]);
	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
	break;
      BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10, ALL)
      BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10, ALL)
	new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
					       1, args[0]);
	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
	break;
      BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
	if (TREE_CODE (args[1]) == INTEGER_CST
	    && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
	  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
					  LSHIFT_EXPR, args[0], args[1]);
	break;
      BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
      BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
	{
	  tree cst = args[1];
	  tree ctype = TREE_TYPE (cst);
	  /* Left shifts can be both scalar or vector, e.g. uint64x1_t is
	     treated as a scalar type not a vector one.  */
	  if ((cst = uniform_integer_cst_p (cst)) != NULL_TREE)
	    {
	      wide_int wcst = wi::to_wide (cst);
	      tree unit_ty = TREE_TYPE (cst);

	      wide_int abs_cst = wi::abs (wcst);
	      if (wi::geu_p (abs_cst, element_precision (args[0])))
		break;

	      if (wi::neg_p (wcst, TYPE_SIGN (ctype)))
		{
		  tree final_cst;
		  final_cst = wide_int_to_tree (unit_ty, abs_cst);
		  if (TREE_CODE (cst) != INTEGER_CST)
		    final_cst = build_uniform_cst (ctype, final_cst);

		  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
						  RSHIFT_EXPR, args[0],
						  final_cst);
		}
	      else
		new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
						LSHIFT_EXPR, args[0], args[1]);
	    }
	}
	break;
      BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
      VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
      BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
      VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
	if (TREE_CODE (args[1]) == INTEGER_CST
	    && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
	  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
					  RSHIFT_EXPR, args[0], args[1]);
	break;
      BUILTIN_GPF (BINOP, fmulx, 0, ALL)
	{
	  gcc_assert (nargs == 2);
	  bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
	  bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
	  if (a0_cst_p || a1_cst_p)
	    {
	      if (a0_cst_p && a1_cst_p)
		{
		  tree t0 = TREE_TYPE (args[0]);
		  real_value a0 = (TREE_REAL_CST (args[0]));
		  real_value a1 = (TREE_REAL_CST (args[1]));
		  if (real_equal (&a1, &dconst0))
		    std::swap (a0, a1);
		  /* According to real_equal (), +0 equals -0.  */
		  if (real_equal (&a0, &dconst0) && real_isinf (&a1))
		    {
		      real_value res = dconst2;
		      res.sign = a0.sign ^ a1.sign;
		      new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
						      REAL_CST,
						      build_real (t0, res));
		    }
		  else
		    new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
						    MULT_EXPR,
						    args[0], args[1]);
		}
	      else /* a0_cst_p ^ a1_cst_p.  */
		{
		  real_value const_part = a0_cst_p
		    ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
		  if (!real_equal (&const_part, &dconst0)
		      && !real_isinf (&const_part))
		    new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
						    MULT_EXPR, args[0],
						    args[1]);
		}
	    }
	  if (new_stmt)
	    {
	      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
	      gimple_set_vdef (new_stmt, gimple_vdef (stmt));
	    }
	  break;
	}
    case AARCH64_SIMD_BUILTIN_LANE_CHECK:
      if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
	{
	  unlink_stmt_vdef (stmt);
	  release_defs (stmt);
	  new_stmt = gimple_build_nop ();
	}
      break;
    default:
      break;
    }
  return new_stmt;
}

void
aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
{
  const unsigned AARCH64_FE_INVALID = 1;
  const unsigned AARCH64_FE_DIVBYZERO = 2;
  const unsigned AARCH64_FE_OVERFLOW = 4;
  const unsigned AARCH64_FE_UNDERFLOW = 8;
  const unsigned AARCH64_FE_INEXACT = 16;
  const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
							| AARCH64_FE_DIVBYZERO
							| AARCH64_FE_OVERFLOW
							| AARCH64_FE_UNDERFLOW
							| AARCH64_FE_INEXACT);
  const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
  tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
  tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
  tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
  tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;

  /* Generate the equivalence of :
       unsigned int fenv_cr;
       fenv_cr = __builtin_aarch64_get_fpcr ();

       unsigned int fenv_sr;
       fenv_sr = __builtin_aarch64_get_fpsr ();

       Now set all exceptions to non-stop
       unsigned int mask_cr
		= ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
       unsigned int masked_cr;
       masked_cr = fenv_cr & mask_cr;

       And clear all exception flags
       unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
       unsigned int masked_cr;
       masked_sr = fenv_sr & mask_sr;

       __builtin_aarch64_set_cr (masked_cr);
       __builtin_aarch64_set_sr (masked_sr);  */

  fenv_cr = create_tmp_var_raw (unsigned_type_node);
  fenv_sr = create_tmp_var_raw (unsigned_type_node);

  get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
  set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
  get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
  set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];

  mask_cr = build_int_cst (unsigned_type_node,
			   ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
  mask_sr = build_int_cst (unsigned_type_node,
			   ~(AARCH64_FE_ALL_EXCEPT));

  ld_fenv_cr = build4 (TARGET_EXPR, unsigned_type_node,
		       fenv_cr, build_call_expr (get_fpcr, 0),
		       NULL_TREE, NULL_TREE);
  ld_fenv_sr = build4 (TARGET_EXPR, unsigned_type_node,
		       fenv_sr, build_call_expr (get_fpsr, 0),
		       NULL_TREE, NULL_TREE);

  masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
  masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);

  hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
  hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);

  hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
			hold_fnclex_sr);
  masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
			masked_fenv_sr);
  ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);

  *hold = build2 (COMPOUND_EXPR, void_type_node,
		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
		  hold_fnclex);

  /* Store the value of masked_fenv to clear the exceptions:
     __builtin_aarch64_set_fpsr (masked_fenv_sr);  */

  *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);

  /* Generate the equivalent of :
       unsigned int new_fenv_var;
       new_fenv_var = __builtin_aarch64_get_fpsr ();

       __builtin_aarch64_set_fpsr (fenv_sr);

       __atomic_feraiseexcept (new_fenv_var);  */

  new_fenv_var = create_tmp_var_raw (unsigned_type_node);
  reload_fenv = build4 (TARGET_EXPR, unsigned_type_node,
			new_fenv_var, build_call_expr (get_fpsr, 0),
			NULL_TREE, NULL_TREE);
  restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
  update_call = build_call_expr (atomic_feraiseexcept, 1,
				 fold_convert (integer_type_node, new_fenv_var));
  *update = build2 (COMPOUND_EXPR, void_type_node,
		    build2 (COMPOUND_EXPR, void_type_node,
			    reload_fenv, restore_fnenv), update_call);
}

/* Resolve overloaded MEMTAG build-in functions.  */
#define AARCH64_BUILTIN_SUBCODE(F) \
  (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT)

static tree
aarch64_resolve_overloaded_memtag (location_t loc,
				   tree fndecl, void *pass_params)
{
  vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params);
  unsigned param_num = params ? params->length() : 0;
  unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl);
  tree inittype = aarch64_memtag_builtin_data[
		    fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype;
  unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1;

  if (param_num != arg_num)
    {
      TREE_TYPE (fndecl) = inittype;
      return NULL_TREE;
    }
  tree retype = NULL;

  if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP)
    {
      tree t0 = TREE_TYPE ((*params)[0]);
      tree t1 = TREE_TYPE ((*params)[1]);

      if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
	t0 = ptr_type_node;
      if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE)
	t1 = ptr_type_node;

      if (TYPE_MODE (t0) != DImode)
	warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
	    (int)tree_to_shwi (DECL_SIZE ((*params)[0])));

      if (TYPE_MODE (t1) != DImode)
	warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit",
	    (int)tree_to_shwi (DECL_SIZE ((*params)[1])));

      retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL);
    }
  else
    {
      tree t0 = TREE_TYPE ((*params)[0]);

      if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
	{
	  TREE_TYPE (fndecl) = inittype;
	  return NULL_TREE;
	}

      if (TYPE_MODE (t0) != DImode)
	warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
	    (int)tree_to_shwi (DECL_SIZE ((*params)[0])));

      switch (fcode)
	{
	case AARCH64_MEMTAG_BUILTIN_IRG:
	  retype = build_function_type_list (t0, t0, uint64_type_node, NULL);
	  break;
	case AARCH64_MEMTAG_BUILTIN_GMI:
	  retype = build_function_type_list (uint64_type_node, t0,
	      uint64_type_node, NULL);
	  break;
	case AARCH64_MEMTAG_BUILTIN_INC_TAG:
	  retype = build_function_type_list (t0, t0, unsigned_type_node, NULL);
	  break;
	case AARCH64_MEMTAG_BUILTIN_SET_TAG:
	  retype = build_function_type_list (void_type_node, t0, NULL);
	  break;
	case AARCH64_MEMTAG_BUILTIN_GET_TAG:
	  retype = build_function_type_list (t0, t0, NULL);
	  break;
	default:
	  return NULL_TREE;
	}
    }

  if (!retype || retype == error_mark_node)
    TREE_TYPE (fndecl) = inittype;
  else
    TREE_TYPE (fndecl) = retype;

  return NULL_TREE;
}

/* Called at aarch64_resolve_overloaded_builtin in aarch64-c.cc.  */
tree
aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
					    void *pass_params)
{
  unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function);

  if (fcode >= AARCH64_MEMTAG_BUILTIN_START
      && fcode <= AARCH64_MEMTAG_BUILTIN_END)
    return aarch64_resolve_overloaded_memtag(loc, function, pass_params);

  return NULL_TREE;
}

#undef AARCH64_CHECK_BUILTIN_MODE
#undef AARCH64_FIND_FRINT_VARIANT
#undef CF0
#undef CF1
#undef CF2
#undef CF3
#undef CF4
#undef CF10
#undef VAR1
#undef VAR2
#undef VAR3
#undef VAR4
#undef VAR5
#undef VAR6
#undef VAR7
#undef VAR8
#undef VAR9
#undef VAR10
#undef VAR11

#include "gt-aarch64-builtins.h"