diff --git a/gas/ChangeLog b/gas/ChangeLog index 325d9df59c..462842b97b 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,150 @@ +2006-04-26 Julian Brown + + * config/tc-arm.c (limits.h): Include. + (fpu_arch_vfp_v3, fpu_vfp_ext_v3, fpu_neon_ext_v1) + (fpu_vfp_v3_or_neon_ext): Declare constants. + (neon_el_type): New enumeration of types for Neon vector elements. + (neon_type_el): New struct. Define type and size of a vector element. + (NEON_MAX_TYPE_ELS): Define constant. The maximum number of types per + instruction. + (neon_type): Define struct. The type of an instruction. + (arm_it): Add 'vectype' for the current instruction. + (isscalar, immisalign, regisimm, isquad): New predicates for operands. + (vfp_sp_reg_pos): Rename to... + (vfp_reg_pos): ...this, and add VFP_REG_Dd, VFP_REG_Dm, VFP_REG_Dn + tags. + (arm_reg_type): Add REG_TYPE_NQ (Neon Q register) and REG_TYPE_NDQ + (Neon D or Q register). + (reg_expected_msgs): Sync with above. Allow VFD to mean VFP or Neon D + register. + (GE_OPT_PREFIX_BIG): Define constant, for use in... + (my_get_expression): Allow above constant as argument to accept + 64-bit constants with optional prefix. + (arm_reg_parse): Add extra argument to return the specific type of + register in when either a D or Q register (REG_TYPE_NDQ) is + requested. Can be NULL. + (parse_scalar): New function. Parse Neon scalar (vector reg and index). + (parse_reg_list): Update for new arm_reg_parse args. + (parse_vfp_reg_list): Allow parsing of Neon D/Q register lists. + (parse_neon_el_struct_list): New function. Parse element/structure + register lists for VLD/VST instructions. + (s_arm_unwind_save_vfp): Update for new parse_vfp_reg_list args. + (s_arm_unwind_save_mmxwr): Likewise. + (s_arm_unwind_save_mmxwcg): Likewise. + (s_arm_unwind_movsp): Likewise. + (s_arm_unwind_setfp): Likewise. + (parse_big_immediate): New function. Parse an immediate, which may be + 64 bits wide. Put results in inst.operands[i]. + (parse_shift): Update for new arm_reg_parse args. + (parse_address): Likewise. Add parsing of alignment specifiers. + (parse_neon_mov): Parse the operands of a VMOV instruction. + (operand_parse_code): Add OP_RND, OP_RNQ, OP_RNDQ, OP_RNSC, OP_NRDLST, + OP_NSTRLST, OP_NILO, OP_RNDQ_I0, OP_RR_RNSC, OP_RNDQ_RNSC, OP_RND_RNSC, + OP_VMOV, OP_RNDQ_IMVNb, OP_RNDQ_I63b, OP_I0, OP_I16z, OP_I32z, OP_I64, + OP_I64z, OP_oI32b, OP_oRND, OP_oRNQ, OP_oRNDQ. + (parse_operands): Handle new codes above. + (encode_arm_vfp_sp_reg): Rename to... + (encode_arm_vfp_reg): ...this. Handle D regs (0-31) too. Complain if + selected VFP version only supports D0-D15. + (do_vfp_sp_monadic, do_vfp_sp_dyadic, do_vfp_sp_compare_z) + (do_vfp_dp_sp_cvt, do_vfp_reg_from_sp, do_vfp_reg2_from_sp2) + (do_vfp_sp_from_reg, do_vfp_sp2_from_reg2, do_vfp_sp_ldst) + (do_vfp_dp_ldst, vfp_sp_ldstm, vfp_dp_ldstm): Update for new + encode_arm_vfp_reg name, and allow 32 D regs. + (do_vfp_dp_rd_rm, do_vfp_dp_rn_rd, do_vfp_dp_rd_rn, do_vfp_dp_rd_rn_rm) + (do_vfp_rm_rd_rn): New functions to encode VFP insns allowing 32 D + regs. + (do_vfp_sp_const, do_vfp_dp_const, vfp_conv, do_vfp_sp_conv_16) + (do_vfp_dp_conv_16, do_vfp_sp_conv_32, do_vfp_dp_conv_32): Handle + constant-load and conversion insns introduced with VFPv3. + (neon_tab_entry): New struct. + (NEON_ENC_TAB): Bit patterns for overloaded Neon instructions, and + those which are the targets of pseudo-instructions. + (neon_opc): Enumerate opcodes, use as indices into... + (neon_enc_tab): ...this. Hold data from NEON_ENC_TAB. + (NEON_ENC_INTEGER, NEON_ENC_ARMREG, NEON_ENC_POLY, NEON_ENC_FLOAT) + (NEON_ENC_SCALAR, NEON_ENC_IMMED, NEON_ENC_INTERLV, NEON_ENC_LANE) + (NEON_ENC_DUP): Define meaningful helper macros to look up values in + neon_enc_tab. + (neon_shape): Enumerate shapes (permitted register widths, etc.) for + Neon instructions. + (neon_type_mask): New. Compact type representation for type checking. + (N_SU_ALL, N_SU_32, N_SU_16_64, N_SUF_32, N_I_ALL, N_IF_32): Common + permitted type combinations. + (N_IGNORE_TYPE): New macro. + (neon_check_shape): New function. Check an instruction shape for + multiple alternatives. Return the specific shape for the current + instruction. + (neon_modify_type_size): New function. Modify a vector type and size, + depending on the bit mask in argument 1. + (neon_type_promote): New function. Convert a given "key" type (of an + operand) into the correct type for a different operand, based on a bit + mask. + (type_chk_of_el_type): New function. Convert a type and size into the + compact representation used for type checking. + (el_type_of_type_ckh): New function. Reverse of above (only when a + single bit is set in the bit mask). + (modify_types_allowed): New function. Alter a mask of allowed types + based on a bit mask of modifications. + (neon_check_type): New function. Check the type of the current + instruction against the variable argument list. The "key" type of the + instruction is returned. + (neon_dp_fixup): New function. Fill in and modify instruction bits for + a Neon data-processing instruction depending on whether we're in ARM + mode or Thumb-2 mode. + (neon_logbits): New function. + (neon_three_same, neon_two_same, do_neon_dyadic_i_su) + (do_neon_dyadic_i64_su, neon_imm_shift, do_neon_shl_imm) + (do_neon_qshl_imm, neon_cmode_for_logic_imm, neon_bits_same_in_bytes) + (neon_squash_bits, neon_is_quarter_float, neon_qfloat_bits) + (neon_cmode_for_move_imm, neon_write_immbits, neon_invert_size) + (do_neon_logic, do_neon_bitfield, neon_dyadic, do_neon_dyadic_if_su) + (do_neon_dyadic_if_su_d, do_neon_dyadic_if_i, do_neon_dyadic_if_i_d) + (do_neon_addsub_if_i, neon_exchange_operands, neon_compare) + (do_neon_cmp, do_neon_cmp_inv, do_neon_ceq, neon_scalar_for_mul) + (neon_mul_mac, do_neon_mac_maybe_scalar, do_neon_tst, do_neon_mul) + (do_neon_qdmulh, do_neon_fcmp_absolute, do_neon_fcmp_absolute_inv) + (do_neon_step, do_neon_abs_neg, do_neon_sli, do_neon_sri) + (do_neon_qshlu_imm, do_neon_qmovn, do_neon_qmovun) + (do_neon_rshift_sat_narrow, do_neon_rshift_sat_narrow_u, do_neon_movn) + (do_neon_rshift_narrow, do_neon_shll, neon_cvt_flavour, do_neon_cvt) + (neon_move_immediate, do_neon_mvn, neon_mixed_length) + (do_neon_dyadic_long, do_neon_abal, neon_mac_reg_scalar_long) + (do_neon_mac_maybe_scalar_long, do_neon_dyadic_wide, do_neon_vmull) + (do_neon_ext, do_neon_rev, do_neon_dup, do_neon_mov) + (do_neon_rshift_round_imm, do_neon_movl, do_neon_trn, do_neon_zip_uzp) + (do_neon_sat_abs_neg, do_neon_pair_long, do_neon_recip_est) + (do_neon_cls, do_neon_clz, do_neon_cnt, do_neon_swp, do_neon_tbl_tbx) + (do_neon_ldm_stm, do_neon_ldr_str, do_neon_ld_st_interleave) + (neon_alignment_bit, do_neon_ld_st_lane, do_neon_ld_dup) + (do_neon_ldx_stx): New functions. Neon bit encoding and encoding + helpers. + (parse_neon_type): New function. Parse Neon type specifier. + (opcode_lookup): Allow parsing of Neon type specifiers. + (REGNUM2, REGSETH, REGSET2): New macros. + (reg_names): Add new VFPv3 and Neon registers. + (NUF, nUF, NCE, nCE): New macros for opcode table. + (insns): More VFP registers allowed in fcpyd, fmdhr, fmdlr, fmrdh, + fmrdl, fabsd, fnegd, fsqrtd, faddd, fsubd, fmuld, fdivd, fmacd, fmscd, + fnmuld, fnmacd, fnmscd, fcmpd, fcmpzd, fcmped, fcmpezd, fmdrr, fmrrd. + Add Neon instructions vaba, vhadd, vrhadd, vhsub, vqadd, vqsub, vrshl, + vqrshl, vshl, vqshl{u}, vand, vbic, vorr, vorn, veor, vbsl, vbit, vbif, + vabd, vmax, vmin, vcge, vcgt, vclt, vcle, vceq, vpmax, vpmin, vmla, + vmls, vpadd, vadd, vsub, vtst, vmul, vqdmulh, vqrdmulh, vacge, vacgt, + vaclt, vacle, vrecps, vrsqrts, vabs, vneg, v{r}shr, v{r}sra, vsli, + vsri, vqshrn, vq{r}shr{u}n, v{r}shrn, vshll, vcvt, vmov, vmvn, vabal, + vabdl, vaddl, vsubl, vmlal, vmlsl, vaddw, vsubw, v{r}addhn, v{r}subhn, + vqdmlal, vqdmlsl, vqdmull, vmull, vext, vrev64, vrev32, vrev16, vdup, + vmovl, v{q}movn, vzip, vuzp, vqabs, vqneg, vpadal, vpaddl, vrecpe, + vrsqrte, vcls, vclz, vcnt, vswp, vtrn, vtbl, vtbx, vldm, vstm, vldr, + vstr, vld[1234], vst[1234], fconst[sd], f[us][lh]to[sd], + fto[us][lh][sd]. + (tc_arm_regname_to_dw2regnum): Update for arm_reg_parse args. + (arm_cpu_option_table): Add Neon and VFPv3 to Cortex-A8. + (arm_option_cpu_value): Add vfp3 and neon. + (aeabi_set_public_attributes): Support VFPv3 and NEON attributes. Fix + VFPv1 attribute. + 2006-04-25 Bob Wilson * config/xtensa-relax.c (widen_spec_list): Use new "WIDE." diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c index 2a54e13492..5d750442dd 100644 --- a/gas/config/tc-arm.c +++ b/gas/config/tc-arm.c @@ -26,6 +26,7 @@ 02110-1301, USA. */ #include +#include #define NO_RELOC 0 #include "as.h" #include "safe-ctype.h" @@ -155,6 +156,8 @@ static const arm_feature_set *mfpu_opt = NULL; static const arm_feature_set fpu_default = FPU_DEFAULT; static const arm_feature_set fpu_arch_vfp_v1 = FPU_ARCH_VFP_V1; static const arm_feature_set fpu_arch_vfp_v2 = FPU_ARCH_VFP_V2; +static const arm_feature_set fpu_arch_vfp_v3 = FPU_ARCH_VFP_V3; +static const arm_feature_set fpu_arch_neon_v1 = FPU_ARCH_NEON_V1; static const arm_feature_set fpu_arch_fpa = FPU_ARCH_FPA; static const arm_feature_set fpu_any_hard = FPU_ANY_HARD; static const arm_feature_set fpu_arch_maverick = FPU_ARCH_MAVERICK; @@ -206,6 +209,10 @@ static const arm_feature_set fpu_vfp_ext_v1xd = ARM_FEATURE (0, FPU_VFP_EXT_V1xD); static const arm_feature_set fpu_vfp_ext_v1 = ARM_FEATURE (0, FPU_VFP_EXT_V1); static const arm_feature_set fpu_vfp_ext_v2 = ARM_FEATURE (0, FPU_VFP_EXT_V2); +static const arm_feature_set fpu_vfp_ext_v3 = ARM_FEATURE (0, FPU_VFP_EXT_V3); +static const arm_feature_set fpu_neon_ext_v1 = ARM_FEATURE (0, FPU_NEON_EXT_V1); +static const arm_feature_set fpu_vfp_v3_or_neon_ext = + ARM_FEATURE (0, FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3); static int mfloat_abi_opt = -1; /* Record user cpu selection for object attributes. */ @@ -256,6 +263,31 @@ static int thumb_mode = 0; static bfd_boolean unified_syntax = FALSE; +enum neon_el_type +{ + NT_untyped, + NT_integer, + NT_float, + NT_poly, + NT_signed, + NT_unsigned, + NT_invtype +}; + +struct neon_type_el +{ + enum neon_el_type type; + unsigned size; +}; + +#define NEON_MAX_TYPE_ELS 4 + +struct neon_type +{ + struct neon_type_el el[NEON_MAX_TYPE_ELS]; + unsigned elems; +}; + struct arm_it { const char * error; @@ -263,6 +295,7 @@ struct arm_it int size; int size_req; int cond; + struct neon_type vectype; /* Set to the opcode if the instruction needs relaxation. Zero if the instruction is not relaxed. */ unsigned long relax; @@ -280,6 +313,12 @@ struct arm_it unsigned present : 1; /* Operand present. */ unsigned isreg : 1; /* Operand was a register. */ unsigned immisreg : 1; /* .imm field is a second register. */ + unsigned isscalar : 1; /* Operand is a (Neon) scalar. */ + unsigned immisalign : 1; /* Immediate is an alignment specifier. */ + /* Note: we abuse "regisimm" to mean "is Neon register" in VMOV + instructions. This allows us to disambiguate ARM <-> vector insns. */ + unsigned regisimm : 1; /* 64-bit immediate, reg forms high 32 bits. */ + unsigned isquad : 1; /* Operand is Neon quad-precision register. */ unsigned hasreloc : 1; /* Operand has relocation suffix. */ unsigned writeback : 1; /* Operand has trailing ! */ unsigned preind : 1; /* Preindexed address. */ @@ -355,9 +394,10 @@ struct reloc_entry bfd_reloc_code_real_type reloc; }; -enum vfp_sp_reg_pos +enum vfp_reg_pos { - VFP_REG_Sd, VFP_REG_Sm, VFP_REG_Sn + VFP_REG_Sd, VFP_REG_Sm, VFP_REG_Sn, + VFP_REG_Dd, VFP_REG_Dm, VFP_REG_Dn }; enum vfp_ldstm_type @@ -375,6 +415,8 @@ enum arm_reg_type REG_TYPE_FN, REG_TYPE_VFS, REG_TYPE_VFD, + REG_TYPE_NQ, + REG_TYPE_NDQ, REG_TYPE_VFC, REG_TYPE_MVF, REG_TYPE_MVD, @@ -405,7 +447,9 @@ const char *const reg_expected_msgs[] = N_("co-processor register expected"), N_("FPA register expected"), N_("VFP single precision register expected"), - N_("VFP double precision register expected"), + N_("VFP/Neon double precision register expected"), + N_("Neon quad precision register expected"), + N_("Neon double or quad precision register expected"), N_("VFP system register expected"), N_("Maverick MVF register expected"), N_("Maverick MVD register expected"), @@ -691,6 +735,9 @@ static int in_my_get_expression = 0; #define GE_NO_PREFIX 0 #define GE_IMM_PREFIX 1 #define GE_OPT_PREFIX 2 +/* This is a bit of a hack. Use an optional prefix, and also allow big (64-bit) + immediates, as can be used in Neon VMVN and VMOV immediate instructions. */ +#define GE_OPT_PREFIX_BIG 3 static int my_get_expression (expressionS * ep, char ** str, int prefix_mode) @@ -700,7 +747,8 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode) /* In unified syntax, all prefixes are optional. */ if (unified_syntax) - prefix_mode = GE_OPT_PREFIX; + prefix_mode = (prefix_mode == GE_OPT_PREFIX_BIG) ? prefix_mode + : GE_OPT_PREFIX; switch (prefix_mode) { @@ -714,6 +762,7 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode) (*str)++; break; case GE_OPT_PREFIX: + case GE_OPT_PREFIX_BIG: if (is_immediate_prefix (**str)) (*str)++; break; @@ -755,11 +804,12 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode) /* Get rid of any bignums now, so that we don't generate an error for which we can't establish a line number later on. Big numbers are never valid in instructions, which is where this routine is always called. */ - if (ep->X_op == O_big - || (ep->X_add_symbol - && (walk_no_bignums (ep->X_add_symbol) - || (ep->X_op_symbol - && walk_no_bignums (ep->X_op_symbol))))) + if (prefix_mode != GE_OPT_PREFIX_BIG + && (ep->X_op == O_big + || (ep->X_add_symbol + && (walk_no_bignums (ep->X_add_symbol) + || (ep->X_op_symbol + && walk_no_bignums (ep->X_op_symbol)))))) { inst.error = _("invalid constant"); *str = input_line_pointer; @@ -940,14 +990,24 @@ arm_reg_parse_multi (char **ccp) } /* As above, but the register must be of type TYPE, and the return - value is the register number or FAIL. */ + value is the register number or FAIL. + If RTYPE is non-zero, return the (possibly restricted) type of the + register (e.g. Neon double or quad reg when either has been requested). */ static int -arm_reg_parse (char **ccp, enum arm_reg_type type) +arm_reg_parse (char **ccp, enum arm_reg_type type, enum arm_reg_type *rtype) { char *start = *ccp; struct reg_entry *reg = arm_reg_parse_multi (ccp); + /* Undo polymorphism for Neon D and Q registers. */ + if (reg && type == REG_TYPE_NDQ + && (reg->type == REG_TYPE_NQ || reg->type == REG_TYPE_VFD)) + type = reg->type; + + if (rtype) + *rtype = type; + if (reg && reg->type == type) return reg->number; @@ -986,6 +1046,46 @@ arm_reg_parse (char **ccp, enum arm_reg_type type) return FAIL; } +/* Parse a Neon scalar. Most of the time when we're parsing a scalar, we don't + have enough information to be able to do a good job bounds-checking. So, we + just do easy checks here, and do further checks later. */ + +static int +parse_scalar (char **ccp, int elsize) +{ + int regno, elno; + char *str = *ccp; + expressionS exp; + + if ((regno = arm_reg_parse (&str, REG_TYPE_VFD, NULL)) == FAIL) + return FAIL; + + if (skip_past_char (&str, '[') == FAIL) + return FAIL; + + my_get_expression (&exp, &str, GE_NO_PREFIX); + if (exp.X_op != O_constant) + { + inst.error = _("constant expression required"); + return FAIL; + } + elno = exp.X_add_number; + + if (elno >= 64 / elsize) + { + inst.error = _("scalar index out of range"); + return FAIL; + } + + if (skip_past_char (&str, ']') == FAIL) + return FAIL; + + /* Parsed scalar successfully. Skip over it. */ + *ccp = str; + + return (regno * 8) + elno; +} + /* Parse an ARM register list. Returns the bitmask, or FAIL. */ static long parse_reg_list (char ** strp) @@ -1009,7 +1109,7 @@ parse_reg_list (char ** strp) { int reg; - if ((reg = arm_reg_parse (&str, REG_TYPE_RN)) == FAIL) + if ((reg = arm_reg_parse (&str, REG_TYPE_RN, NULL)) == FAIL) { inst.error = _(reg_expected_msgs[REG_TYPE_RN]); return FAIL; @@ -1111,53 +1211,101 @@ parse_reg_list (char ** strp) return range; } +/* Types of registers in a list. */ + +enum reg_list_els +{ + REGLIST_VFP_S, + REGLIST_VFP_D, + REGLIST_NEON_D +}; + /* Parse a VFP register list. If the string is invalid return FAIL. Otherwise return the number of registers, and set PBASE to the first - register. Double precision registers are matched if DP is nonzero. */ + register. Parses registers of type ETYPE. + If REGLIST_NEON_D is used, several syntax enhancements are enabled: + - Q registers can be used to specify pairs of D registers + - { } can be omitted from around a singleton register list + FIXME: This is not implemented, as it would require backtracking in + some cases, e.g.: + vtbl.8 d3,d4,d5 + This could be done (the meaning isn't really ambiguous), but doesn't + fit in well with the current parsing framework. + - 32 D registers may be used (also true for VFPv3). */ static int -parse_vfp_reg_list (char **str, unsigned int *pbase, int dp) +parse_vfp_reg_list (char **str, unsigned int *pbase, enum reg_list_els etype) { int base_reg; int new_base; - int regtype; - int max_regs; + enum arm_reg_type regtype = 0; + int max_regs = 0; int count = 0; int warned = 0; unsigned long mask = 0; int i; if (**str != '{') - return FAIL; + { + inst.error = _("expecting {"); + return FAIL; + } (*str)++; - if (dp) - { - regtype = REG_TYPE_VFD; - max_regs = 16; - } - else + switch (etype) { + case REGLIST_VFP_S: regtype = REG_TYPE_VFS; max_regs = 32; + break; + + case REGLIST_VFP_D: + regtype = REG_TYPE_VFD; + /* VFPv3 allows 32 D registers. */ + if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v3)) + { + max_regs = 32; + if (thumb_mode) + ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, + fpu_vfp_ext_v3); + else + ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, + fpu_vfp_ext_v3); + } + else + max_regs = 16; + break; + + case REGLIST_NEON_D: + regtype = REG_TYPE_NDQ; + max_regs = 32; + break; } base_reg = max_regs; do { - new_base = arm_reg_parse (str, regtype); + int setmask = 1, addregs = 1; + new_base = arm_reg_parse (str, regtype, ®type); if (new_base == FAIL) { inst.error = gettext (reg_expected_msgs[regtype]); return FAIL; } + /* Note: a value of 2 * n is returned for the register Q. */ + if (regtype == REG_TYPE_NQ) + { + setmask = 3; + addregs = 2; + } + if (new_base < base_reg) base_reg = new_base; - if (mask & (1 << new_base)) + if (mask & (setmask << new_base)) { inst.error = _("invalid register list"); return FAIL; @@ -1169,8 +1317,8 @@ parse_vfp_reg_list (char **str, unsigned int *pbase, int dp) warned = 1; } - mask |= 1 << new_base; - count++; + mask |= setmask << new_base; + count += addregs; if (**str == '-') /* We have the start of a range expression */ { @@ -1178,28 +1326,31 @@ parse_vfp_reg_list (char **str, unsigned int *pbase, int dp) (*str)++; - if ((high_range = arm_reg_parse (str, regtype)) == FAIL) + if ((high_range = arm_reg_parse (str, regtype, NULL)) == FAIL) { inst.error = gettext (reg_expected_msgs[regtype]); return FAIL; } + if (regtype == REG_TYPE_NQ) + high_range = high_range + 1; + if (high_range <= new_base) { inst.error = _("register range not in ascending order"); return FAIL; } - for (new_base++; new_base <= high_range; new_base++) + for (new_base += addregs; new_base <= high_range; new_base += addregs) { - if (mask & (1 << new_base)) + if (mask & (setmask << new_base)) { inst.error = _("invalid register list"); return FAIL; } - mask |= 1 << new_base; - count++; + mask |= setmask << new_base; + count += addregs; } } } @@ -1227,6 +1378,180 @@ parse_vfp_reg_list (char **str, unsigned int *pbase, int dp) return count; } +/* Parse element/structure lists for Neon VLD and VST instructions. + The base register is put in *PBASE. + The lane (or one of the #defined constants below) is placed in bits [3:0] of + the return value. + The register stride (minus one) is put in bit 4 of the return value. + Bits [6:5] encode the list length (minus one). */ + +#define NEON_ALL_LANES 15 +#define NEON_INTERLEAVE_LANES 14 +#define NEON_LANE(X) ((X) & 0xf) +#define NEON_REG_STRIDE(X) (((X) & (1 << 4)) ? 2 : 1) +#define NEON_REGLIST_LENGTH(X) ((((X) >> 5) & 3) + 1) + +static int +parse_neon_el_struct_list (char **str, unsigned *pbase) +{ + char *ptr = *str; + int base_reg = -1; + int reg_incr = -1; + int count = 0; + int lane = -1; + int leading_brace = 0; + enum arm_reg_type rtype = REG_TYPE_NDQ; + int addregs = 1; + const char *const incr_error = "register stride must be 1 or 2"; + const char *const type_error = "mismatched element/structure types in list"; + + if (skip_past_char (&ptr, '{') == SUCCESS) + leading_brace = 1; + + do + { + int getreg = arm_reg_parse (&ptr, rtype, &rtype); + if (getreg == FAIL) + { + inst.error = _(reg_expected_msgs[rtype]); + return FAIL; + } + + if (base_reg == -1) + { + base_reg = getreg; + if (rtype == REG_TYPE_NQ) + { + reg_incr = 1; + addregs = 2; + } + } + else if (reg_incr == -1) + { + reg_incr = getreg - base_reg; + if (reg_incr < 1 || reg_incr > 2) + { + inst.error = _(incr_error); + return FAIL; + } + } + else if (getreg != base_reg + reg_incr * count) + { + inst.error = _(incr_error); + return FAIL; + } + + /* Handle Dn-Dm or Qn-Qm syntax. Can only be used with non-indexed list + modes. */ + if (ptr[0] == '-') + { + int hireg, dregs = (rtype == REG_TYPE_NQ) ? 2 : 1; + if (lane == -1) + lane = NEON_INTERLEAVE_LANES; + else if (lane != NEON_INTERLEAVE_LANES) + { + inst.error = _(type_error); + return FAIL; + } + if (reg_incr == -1) + reg_incr = 1; + else if (reg_incr != 1) + { + inst.error = _("don't use Rn-Rm syntax with non-unit stride"); + return FAIL; + } + ptr++; + hireg = arm_reg_parse (&ptr, rtype, NULL); + if (hireg == FAIL) + { + inst.error = _(reg_expected_msgs[rtype]); + return FAIL; + } + count += hireg + dregs - getreg; + continue; + } + + /* If we're using Q registers, we can't use [] or [n] syntax. */ + if (rtype == REG_TYPE_NQ) + { + count += 2; + continue; + } + + if (skip_past_char (&ptr, '[') == SUCCESS) + { + if (skip_past_char (&ptr, ']') == SUCCESS) + { + if (lane == -1) + lane = NEON_ALL_LANES; + else if (lane != NEON_ALL_LANES) + { + inst.error = _(type_error); + return FAIL; + } + } + else + { + expressionS exp; + my_get_expression (&exp, &ptr, GE_NO_PREFIX); + if (exp.X_op != O_constant) + { + inst.error = _("constant expression required"); + return FAIL; + } + if (lane == -1) + lane = exp.X_add_number; + else if (lane != exp.X_add_number) + { + inst.error = _(type_error); + return FAIL; + } + + if (skip_past_char (&ptr, ']') == FAIL) + { + inst.error = _("expected ]"); + return FAIL; + } + } + } + else if (lane == -1) + lane = NEON_INTERLEAVE_LANES; + else if (lane != NEON_INTERLEAVE_LANES) + { + inst.error = _(type_error); + return FAIL; + } + count++; + } + while ((count != 1 || leading_brace) && skip_past_comma (&ptr) != FAIL); + + /* No lane set by [x]. We must be interleaving structures. */ + if (lane == -1) + lane = NEON_INTERLEAVE_LANES; + + /* Sanity check. */ + if (lane == -1 || base_reg == -1 || count < 1 || count > 4 + || (count > 1 && reg_incr == -1)) + { + inst.error = _("error parsing element/structure list"); + return FAIL; + } + + if ((count > 1 || leading_brace) && skip_past_char (&ptr, '}') == FAIL) + { + inst.error = _("expected }"); + return FAIL; + } + + if (reg_incr == -1) + reg_incr = 1; + + *pbase = base_reg; + *str = ptr; + + return lane | ((reg_incr - 1) << 4) | ((count - 1) << 5); +} + /* Parse an explicit relocation suffix on an expression. This is either nothing, or a word in parentheses. Note that if !OBJ_ELF, arm_reloc_hsh contains no entries, so this function can only @@ -2423,7 +2748,7 @@ s_arm_unwind_save_vfp (void) unsigned int reg; valueT op; - count = parse_vfp_reg_list (&input_line_pointer, ®, 1); + count = parse_vfp_reg_list (&input_line_pointer, ®, REGLIST_VFP_D); if (count == FAIL) { as_bad (_("expected register list")); @@ -2465,7 +2790,7 @@ s_arm_unwind_save_mmxwr (void) do { - reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR); + reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR, NULL); if (reg == FAIL) { @@ -2480,7 +2805,7 @@ s_arm_unwind_save_mmxwr (void) if (*input_line_pointer == '-') { input_line_pointer++; - hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR); + hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR, NULL); if (hi_reg == FAIL) { as_bad (_(reg_expected_msgs[REG_TYPE_MMXWR])); @@ -2597,7 +2922,7 @@ s_arm_unwind_save_mmxwcg (void) do { - reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG); + reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG, NULL); if (reg == FAIL) { @@ -2613,7 +2938,7 @@ s_arm_unwind_save_mmxwcg (void) if (*input_line_pointer == '-') { input_line_pointer++; - hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG); + hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG, NULL); if (hi_reg == FAIL) { as_bad (_(reg_expected_msgs[REG_TYPE_MMXWCG])); @@ -2711,7 +3036,7 @@ s_arm_unwind_movsp (int ignored ATTRIBUTE_UNUSED) int reg; valueT op; - reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN); + reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN, NULL); if (reg == FAIL) { as_bad (_(reg_expected_msgs[REG_TYPE_RN])); @@ -2772,11 +3097,11 @@ s_arm_unwind_setfp (int ignored ATTRIBUTE_UNUSED) int fp_reg; int offset; - fp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN); + fp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN, NULL); if (skip_past_comma (&input_line_pointer) == FAIL) sp_reg = FAIL; else - sp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN); + sp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN, NULL); if (fp_reg == FAIL || sp_reg == FAIL) { @@ -3043,6 +3368,47 @@ parse_immediate (char **str, int *val, int min, int max, return SUCCESS; } +/* Less-generic immediate-value read function with the possibility of loading a + big (64-bit) immediate, as required by Neon VMOV and VMVN immediate + instructions. Puts the result directly in inst.operands[i]. */ + +static int +parse_big_immediate (char **str, int i) +{ + expressionS exp; + char *ptr = *str; + + my_get_expression (&exp, &ptr, GE_OPT_PREFIX_BIG); + + if (exp.X_op == O_constant) + inst.operands[i].imm = exp.X_add_number; + else if (exp.X_op == O_big + && LITTLENUM_NUMBER_OF_BITS * exp.X_add_number > 32 + && LITTLENUM_NUMBER_OF_BITS * exp.X_add_number <= 64) + { + unsigned parts = 32 / LITTLENUM_NUMBER_OF_BITS, j, idx = 0; + /* Bignums have their least significant bits in + generic_bignum[0]. Make sure we put 32 bits in imm and + 32 bits in reg, in a (hopefully) portable way. */ + assert (parts != 0); + inst.operands[i].imm = 0; + for (j = 0; j < parts; j++, idx++) + inst.operands[i].imm |= generic_bignum[idx] + << (LITTLENUM_NUMBER_OF_BITS * j); + inst.operands[i].reg = 0; + for (j = 0; j < parts; j++, idx++) + inst.operands[i].reg |= generic_bignum[idx] + << (LITTLENUM_NUMBER_OF_BITS * j); + inst.operands[i].regisimm = 1; + } + else + return FAIL; + + *str = ptr; + + return SUCCESS; +} + /* Returns the pseudo-register number of an FPA immediate constant, or FAIL if there isn't a valid constant here. */ @@ -3232,7 +3598,7 @@ parse_shift (char **str, int i, enum parse_shift_mode mode) skip_whitespace (p); if (mode == NO_SHIFT_RESTRICT - && (reg = arm_reg_parse (&p, REG_TYPE_RN)) != FAIL) + && (reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) != FAIL) { inst.operands[i].imm = reg; inst.operands[i].immisreg = 1; @@ -3263,7 +3629,7 @@ parse_shifter_operand (char **str, int i) int value; expressionS expr; - if ((value = arm_reg_parse (str, REG_TYPE_RN)) != FAIL) + if ((value = arm_reg_parse (str, REG_TYPE_RN, NULL)) != FAIL) { inst.operands[i].reg = value; inst.operands[i].isreg = 1; @@ -3374,7 +3740,7 @@ parse_address (char **str, int i) return SUCCESS; } - if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) == FAIL) + if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) == FAIL) { inst.error = _(reg_expected_msgs[REG_TYPE_RN]); return FAIL; @@ -3389,7 +3755,7 @@ parse_address (char **str, int i) if (*p == '+') p++; else if (*p == '-') p++, inst.operands[i].negative = 1; - if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) != FAIL) + if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) != FAIL) { inst.operands[i].imm = reg; inst.operands[i].immisreg = 1; @@ -3398,6 +3764,23 @@ parse_address (char **str, int i) if (parse_shift (&p, i, SHIFT_IMMEDIATE) == FAIL) return FAIL; } + else if (skip_past_char (&p, ':') == SUCCESS) + { + /* FIXME: '@' should be used here, but it's filtered out by generic + code before we get to see it here. This may be subject to + change. */ + expressionS exp; + my_get_expression (&exp, &p, GE_NO_PREFIX); + if (exp.X_op != O_constant) + { + inst.error = _("alignment must be constant"); + return FAIL; + } + inst.operands[i].imm = exp.X_add_number << 8; + inst.operands[i].immisalign = 1; + /* Alignments are not pre-indexes. */ + inst.operands[i].preind = 0; + } else { if (inst.operands[i].negative) @@ -3455,9 +3838,14 @@ parse_address (char **str, int i) if (*p == '+') p++; else if (*p == '-') p++, inst.operands[i].negative = 1; - if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) != FAIL) + if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) != FAIL) { - inst.operands[i].imm = reg; + /* We might be using the immediate for alignment already. If we + are, OR the register number into the low-order bits. */ + if (inst.operands[i].immisalign) + inst.operands[i].imm |= reg; + else + inst.operands[i].imm = reg; inst.operands[i].immisreg = 1; if (skip_past_comma (&p) == SUCCESS) @@ -3711,7 +4099,7 @@ parse_tb (char **str) return FAIL; } - if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) == FAIL) + if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) == FAIL) { inst.error = _(reg_expected_msgs[REG_TYPE_RN]); return FAIL; @@ -3724,7 +4112,7 @@ parse_tb (char **str) return FAIL; } - if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) == FAIL) + if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) == FAIL) { inst.error = _(reg_expected_msgs[REG_TYPE_RN]); return FAIL; @@ -3752,6 +4140,158 @@ parse_tb (char **str) return SUCCESS; } +/* Parse the operands of a Neon VMOV instruction. See do_neon_mov for more + information on the types the operands can take and how they are encoded. + Note particularly the abuse of ".regisimm" to signify a Neon register. + Up to three operands may be read; this function handles setting the + ".present" field for each operand itself. + Updates STR and WHICH_OPERAND if parsing is successful and returns SUCCESS, + else returns FAIL. */ + +static int +parse_neon_mov (char **str, int *which_operand) +{ + int i = *which_operand, val; + enum arm_reg_type rtype; + char *ptr = *str; + + if ((val = parse_scalar (&ptr, 8)) != FAIL) + { + /* Case 4: VMOV. , . */ + inst.operands[i].reg = val; + inst.operands[i].isscalar = 1; + inst.operands[i++].present = 1; + + if (skip_past_comma (&ptr) == FAIL) + goto wanted_comma; + + if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) == FAIL) + goto wanted_arm; + + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i].present = 1; + } + else if ((val = arm_reg_parse (&ptr, REG_TYPE_NDQ, &rtype)) != FAIL) + { + /* Cases 0, 1, 2, 3, 5 (D only). */ + if (skip_past_comma (&ptr) == FAIL) + goto wanted_comma; + + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i].isquad = (rtype == REG_TYPE_NQ); + inst.operands[i++].present = 1; + + if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) != FAIL) + { + /* Case 5: VMOV , , . */ + inst.operands[i-1].regisimm = 1; + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i++].present = 1; + + if (rtype == REG_TYPE_NQ) + { + inst.error = _("can't use Neon quad register here"); + return FAIL; + } + if (skip_past_comma (&ptr) == FAIL) + goto wanted_comma; + if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) == FAIL) + goto wanted_arm; + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i].present = 1; + } + else if (parse_big_immediate (&ptr, i) == SUCCESS) + { + /* Case 2: VMOV.
, # + Case 3: VMOV.
, # */ + if (!thumb_mode && (inst.instruction & 0xf0000000) != 0xe0000000) + goto bad_cond; + } + else if ((val = arm_reg_parse (&ptr, REG_TYPE_NDQ, &rtype)) != FAIL) + { + /* Case 0: VMOV , + Case 1: VMOV
, */ + if (!thumb_mode && (inst.instruction & 0xf0000000) != 0xe0000000) + goto bad_cond; + + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i].isquad = (rtype == REG_TYPE_NQ); + inst.operands[i].present = 1; + } + else + { + inst.error = _("expected or or operand"); + return FAIL; + } + } + else if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) != FAIL) + { + /* Cases 6, 7. */ + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i++].present = 1; + + if (skip_past_comma (&ptr) == FAIL) + goto wanted_comma; + + if ((val = parse_scalar (&ptr, 8)) != FAIL) + { + /* Case 6: VMOV.
, */ + inst.operands[i].reg = val; + inst.operands[i].isscalar = 1; + inst.operands[i].present = 1; + } + else if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) != FAIL) + { + /* Case 7: VMOV , , */ + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i++].present = 1; + + if (skip_past_comma (&ptr) == FAIL) + goto wanted_comma; + + if ((val = arm_reg_parse (&ptr, REG_TYPE_VFD, NULL)) == FAIL) + { + inst.error = _(reg_expected_msgs[REG_TYPE_VFD]); + return FAIL; + } + + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i].regisimm = 1; + inst.operands[i].present = 1; + } + } + else + { + inst.error = _("parse error"); + return FAIL; + } + + /* Successfully parsed the operands. Update args. */ + *which_operand = i; + *str = ptr; + return SUCCESS; + + wanted_comma: + inst.error = _("expected comma"); + return FAIL; + + wanted_arm: + inst.error = _(reg_expected_msgs[REG_TYPE_RN]); + return FAIL; + + bad_cond: + inst.error = _("instruction cannot be conditionalized"); + return FAIL; +} + /* Matcher codes for parse_operands. */ enum operand_parse_code { @@ -3765,7 +4305,11 @@ enum operand_parse_code OP_RCN, /* Coprocessor register */ OP_RF, /* FPA register */ OP_RVS, /* VFP single precision register */ - OP_RVD, /* VFP double precision register */ + OP_RVD, /* VFP double precision register (0..15) */ + OP_RND, /* Neon double precision register (0..31) */ + OP_RNQ, /* Neon quad precision register */ + OP_RNDQ, /* Neon double or quad precision register */ + OP_RNSC, /* Neon scalar D[X] */ OP_RVC, /* VFP control register */ OP_RMF, /* Maverick F register */ OP_RMD, /* Maverick D register */ @@ -3781,14 +4325,31 @@ enum operand_parse_code OP_REGLST, /* ARM register list */ OP_VRSLST, /* VFP single-precision register list */ OP_VRDLST, /* VFP double-precision register list */ + OP_NRDLST, /* Neon double-precision register list (d0-d31, qN aliases) */ + OP_NSTRLST, /* Neon element/structure list */ + OP_NILO, /* Neon immediate/logic operands 2 or 2+3. (VBIC, VORR...) */ + OP_RNDQ_I0, /* Neon D or Q reg, or immediate zero. */ + OP_RR_RNSC, /* ARM reg or Neon scalar. */ + OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar. */ + OP_RND_RNSC, /* Neon D reg, or Neon scalar. */ + OP_VMOV, /* Neon VMOV operands. */ + OP_RNDQ_IMVNb,/* Neon D or Q reg, or immediate good for VMVN. */ + OP_RNDQ_I63b, /* Neon D or Q reg, or immediate for shift. */ + + OP_I0, /* immediate zero */ OP_I7, /* immediate value 0 .. 7 */ OP_I15, /* 0 .. 15 */ OP_I16, /* 1 .. 16 */ + OP_I16z, /* 0 .. 16 */ OP_I31, /* 0 .. 31 */ OP_I31w, /* 0 .. 31, optional trailing ! */ OP_I32, /* 1 .. 32 */ + OP_I32z, /* 0 .. 32 */ + OP_I63, /* 0 .. 63 */ OP_I63s, /* -64 .. 63 */ + OP_I64, /* 1 .. 64 */ + OP_I64z, /* 0 .. 64 */ OP_I255, /* 0 .. 255 */ OP_Iffff, /* 0 .. 65535 */ @@ -3818,11 +4379,15 @@ enum operand_parse_code /* Optional operands. */ OP_oI7b, /* immediate, prefix optional, 0 .. 7 */ OP_oI31b, /* 0 .. 31 */ + OP_oI32b, /* 1 .. 32 */ OP_oIffffb, /* 0 .. 65535 */ OP_oI255c, /* curly-brace enclosed, 0 .. 255 */ OP_oRR, /* ARM register */ OP_oRRnpc, /* ARM register, not the PC */ + OP_oRND, /* Optional Neon double precision register */ + OP_oRNQ, /* Optional Neon quad precision register */ + OP_oRNDQ, /* Optional Neon double or quad precision register */ OP_oSHll, /* LSL immediate */ OP_oSHar, /* ASR immediate */ OP_oSHllar, /* LSL or ASR immediate */ @@ -3843,6 +4408,7 @@ parse_operands (char *str, const unsigned char *pattern) char *backtrack_pos = 0; const char *backtrack_error = 0; int i, val, backtrack_index = 0; + enum arm_reg_type rtype; #define po_char_or_fail(chr) do { \ if (skip_past_char (&str, chr) == FAIL) \ @@ -3850,7 +4416,7 @@ parse_operands (char *str, const unsigned char *pattern) } while (0) #define po_reg_or_fail(regtype) do { \ - val = arm_reg_parse (&str, regtype); \ + val = arm_reg_parse (&str, regtype, &rtype); \ if (val == FAIL) \ { \ inst.error = _(reg_expected_msgs[regtype]); \ @@ -3858,15 +4424,17 @@ parse_operands (char *str, const unsigned char *pattern) } \ inst.operands[i].reg = val; \ inst.operands[i].isreg = 1; \ + inst.operands[i].isquad = (rtype == REG_TYPE_NQ); \ } while (0) -#define po_reg_or_goto(regtype, label) do { \ - val = arm_reg_parse (&str, regtype); \ - if (val == FAIL) \ - goto label; \ - \ - inst.operands[i].reg = val; \ - inst.operands[i].isreg = 1; \ +#define po_reg_or_goto(regtype, label) do { \ + val = arm_reg_parse (&str, regtype, &rtype); \ + if (val == FAIL) \ + goto label; \ + \ + inst.operands[i].reg = val; \ + inst.operands[i].isreg = 1; \ + inst.operands[i].isquad = (rtype == REG_TYPE_NQ); \ } while (0) #define po_imm_or_fail(min, max, popt) do { \ @@ -3875,6 +4443,14 @@ parse_operands (char *str, const unsigned char *pattern) inst.operands[i].imm = val; \ } while (0) +#define po_scalar_or_goto(elsz, label) do { \ + val = parse_scalar (&str, elsz); \ + if (val == FAIL) \ + goto label; \ + inst.operands[i].reg = val; \ + inst.operands[i].isscalar = 1; \ +} while (0) + #define po_misc_or_fail(expr) do { \ if (expr) \ goto failure; \ @@ -3908,6 +4484,8 @@ parse_operands (char *str, const unsigned char *pattern) case OP_RF: po_reg_or_fail (REG_TYPE_FN); break; case OP_RVS: po_reg_or_fail (REG_TYPE_VFS); break; case OP_RVD: po_reg_or_fail (REG_TYPE_VFD); break; + case OP_oRND: + case OP_RND: po_reg_or_fail (REG_TYPE_VFD); break; case OP_RVC: po_reg_or_fail (REG_TYPE_VFC); break; case OP_RMF: po_reg_or_fail (REG_TYPE_MVF); break; case OP_RMD: po_reg_or_fail (REG_TYPE_MVD); break; @@ -3919,6 +4497,104 @@ parse_operands (char *str, const unsigned char *pattern) case OP_RIWC: po_reg_or_fail (REG_TYPE_MMXWC); break; case OP_RIWG: po_reg_or_fail (REG_TYPE_MMXWCG); break; case OP_RXA: po_reg_or_fail (REG_TYPE_XSCALE); break; + case OP_oRNQ: + case OP_RNQ: po_reg_or_fail (REG_TYPE_NQ); break; + case OP_oRNDQ: + case OP_RNDQ: po_reg_or_fail (REG_TYPE_NDQ); break; + + /* Neon scalar. Using an element size of 8 means that some invalid + scalars are accepted here, so deal with those in later code. */ + case OP_RNSC: po_scalar_or_goto (8, failure); break; + + /* WARNING: We can expand to two operands here. This has the potential + to totally confuse the backtracking mechanism! It will be OK at + least as long as we don't try to use optional args as well, + though. */ + case OP_NILO: + { + po_reg_or_goto (REG_TYPE_NDQ, try_imm); + i++; + skip_past_comma (&str); + po_reg_or_goto (REG_TYPE_NDQ, one_reg_only); + break; + one_reg_only: + /* Optional register operand was omitted. Unfortunately, it's in + operands[i-1] and we need it to be in inst.operands[i]. Fix that + here (this is a bit grotty). */ + inst.operands[i] = inst.operands[i-1]; + inst.operands[i-1].present = 0; + break; + try_imm: + /* Immediate gets verified properly later, so accept any now. */ + po_imm_or_fail (INT_MIN, INT_MAX, TRUE); + } + break; + + case OP_RNDQ_I0: + { + po_reg_or_goto (REG_TYPE_NDQ, try_imm0); + break; + try_imm0: + po_imm_or_fail (0, 0, TRUE); + } + break; + + case OP_RR_RNSC: + { + po_scalar_or_goto (8, try_rr); + break; + try_rr: + po_reg_or_fail (REG_TYPE_RN); + } + break; + + case OP_RNDQ_RNSC: + { + po_scalar_or_goto (8, try_ndq); + break; + try_ndq: + po_reg_or_fail (REG_TYPE_NDQ); + } + break; + + case OP_RND_RNSC: + { + po_scalar_or_goto (8, try_vfd); + break; + try_vfd: + po_reg_or_fail (REG_TYPE_VFD); + } + break; + + case OP_VMOV: + /* WARNING: parse_neon_mov can move the operand counter, i. If we're + not careful then bad things might happen. */ + po_misc_or_fail (parse_neon_mov (&str, &i) == FAIL); + break; + + case OP_RNDQ_IMVNb: + { + po_reg_or_goto (REG_TYPE_NDQ, try_mvnimm); + break; + try_mvnimm: + /* There's a possibility of getting a 64-bit immediate here, so + we need special handling. */ + if (parse_big_immediate (&str, i) == FAIL) + { + inst.error = _("immediate value is out of range"); + goto failure; + } + } + break; + + case OP_RNDQ_I63b: + { + po_reg_or_goto (REG_TYPE_NDQ, try_shimm); + break; + try_shimm: + po_imm_or_fail (0, 63, TRUE); + } + break; case OP_RRnpcb: po_char_or_fail ('['); @@ -3936,9 +4612,14 @@ parse_operands (char *str, const unsigned char *pattern) case OP_I7: po_imm_or_fail ( 0, 7, FALSE); break; case OP_I15: po_imm_or_fail ( 0, 15, FALSE); break; case OP_I16: po_imm_or_fail ( 1, 16, FALSE); break; + case OP_I16z: po_imm_or_fail ( 0, 16, FALSE); break; case OP_I31: po_imm_or_fail ( 0, 31, FALSE); break; case OP_I32: po_imm_or_fail ( 1, 32, FALSE); break; + case OP_I32z: po_imm_or_fail ( 0, 32, FALSE); break; case OP_I63s: po_imm_or_fail (-64, 63, FALSE); break; + case OP_I63: po_imm_or_fail ( 0, 63, FALSE); break; + case OP_I64: po_imm_or_fail ( 1, 64, FALSE); break; + case OP_I64z: po_imm_or_fail ( 0, 64, FALSE); break; case OP_I255: po_imm_or_fail ( 0, 255, FALSE); break; case OP_Iffff: po_imm_or_fail ( 0, 0xffff, FALSE); break; @@ -3948,6 +4629,7 @@ parse_operands (char *str, const unsigned char *pattern) case OP_I15b: po_imm_or_fail ( 0, 15, TRUE); break; case OP_oI31b: case OP_I31b: po_imm_or_fail ( 0, 31, TRUE); break; + case OP_oI32b: po_imm_or_fail ( 1, 32, TRUE); break; case OP_oIffffb: po_imm_or_fail ( 0, 0xffff, TRUE); break; /* Immediate variants */ @@ -4066,13 +4748,22 @@ parse_operands (char *str, const unsigned char *pattern) break; case OP_VRSLST: - val = parse_vfp_reg_list (&str, &inst.operands[i].reg, 0); + val = parse_vfp_reg_list (&str, &inst.operands[i].reg, REGLIST_VFP_S); break; case OP_VRDLST: - val = parse_vfp_reg_list (&str, &inst.operands[i].reg, 1); + val = parse_vfp_reg_list (&str, &inst.operands[i].reg, REGLIST_VFP_D); break; + case OP_NRDLST: + val = parse_vfp_reg_list (&str, &inst.operands[i].reg, + REGLIST_NEON_D); + break; + + case OP_NSTRLST: + val = parse_neon_el_struct_list (&str, &inst.operands[i].reg); + break; + /* Addressing modes */ case OP_ADDR: po_misc_or_fail (parse_address (&str, i)); @@ -4121,6 +4812,8 @@ parse_operands (char *str, const unsigned char *pattern) case OP_REGLST: case OP_VRSLST: case OP_VRDLST: + case OP_NRDLST: + case OP_NSTRLST: if (val == FAIL) goto failure; inst.operands[i].imm = val; @@ -4178,6 +4871,7 @@ parse_operands (char *str, const unsigned char *pattern) #undef po_reg_or_fail #undef po_reg_or_goto #undef po_imm_or_fail +#undef po_scalar_or_fail /* Shorthand macro for instruction encoding functions issuing errors. */ #define constraint(expr, err) do { \ @@ -4236,11 +4930,30 @@ encode_thumb32_immediate (unsigned int val) return FAIL; } -/* Encode a VFP SP register number into inst.instruction. */ +/* Encode a VFP SP or DP register number into inst.instruction. */ static void -encode_arm_vfp_sp_reg (int reg, enum vfp_sp_reg_pos pos) +encode_arm_vfp_reg (int reg, enum vfp_reg_pos pos) { + if ((pos == VFP_REG_Dd || pos == VFP_REG_Dn || pos == VFP_REG_Dm) + && reg > 15) + { + if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v3)) + { + if (thumb_mode) + ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, + fpu_vfp_ext_v3); + else + ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, + fpu_vfp_ext_v3); + } + else + { + inst.error = _("D register out of range for selected VFP version"); + return; + } + } + switch (pos) { case VFP_REG_Sd: @@ -4255,6 +4968,18 @@ encode_arm_vfp_sp_reg (int reg, enum vfp_sp_reg_pos pos) inst.instruction |= ((reg >> 1) << 0) | ((reg & 1) << 5); break; + case VFP_REG_Dd: + inst.instruction |= ((reg & 15) << 12) | ((reg >> 4) << 22); + break; + + case VFP_REG_Dn: + inst.instruction |= ((reg & 15) << 16) | ((reg >> 4) << 7); + break; + + case VFP_REG_Dm: + inst.instruction |= (reg & 15) | ((reg >> 4) << 5); + break; + default: abort (); } @@ -5542,43 +6267,43 @@ do_sxth (void) static void do_vfp_sp_monadic (void) { - encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd); - encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sm); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sm); } static void do_vfp_sp_dyadic (void) { - encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd); - encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sn); - encode_arm_vfp_sp_reg (inst.operands[2].reg, VFP_REG_Sm); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sn); + encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Sm); } static void do_vfp_sp_compare_z (void) { - encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); } static void do_vfp_dp_sp_cvt (void) { - inst.instruction |= inst.operands[0].reg << 12; - encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sm); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sm); } static void do_vfp_sp_dp_cvt (void) { - encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd); - inst.instruction |= inst.operands[1].reg; + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dm); } static void do_vfp_reg_from_sp (void) { inst.instruction |= inst.operands[0].reg << 12; - encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sn); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sn); } static void @@ -5588,13 +6313,13 @@ do_vfp_reg2_from_sp2 (void) _("only two consecutive VFP SP registers allowed here")); inst.instruction |= inst.operands[0].reg << 12; inst.instruction |= inst.operands[1].reg << 16; - encode_arm_vfp_sp_reg (inst.operands[2].reg, VFP_REG_Sm); + encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Sm); } static void do_vfp_sp_from_reg (void) { - encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sn); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sn); inst.instruction |= inst.operands[1].reg << 12; } @@ -5603,7 +6328,7 @@ do_vfp_sp2_from_reg2 (void) { constraint (inst.operands[0].imm != 2, _("only two consecutive VFP SP registers allowed here")); - encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sm); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sm); inst.instruction |= inst.operands[1].reg << 12; inst.instruction |= inst.operands[2].reg << 16; } @@ -5611,14 +6336,14 @@ do_vfp_sp2_from_reg2 (void) static void do_vfp_sp_ldst (void) { - encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); encode_arm_cp_address (1, FALSE, TRUE, 0); } static void do_vfp_dp_ldst (void) { - inst.instruction |= inst.operands[0].reg << 12; + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); encode_arm_cp_address (1, FALSE, TRUE, 0); } @@ -5632,7 +6357,7 @@ vfp_sp_ldstm (enum vfp_ldstm_type ldstm_type) constraint (ldstm_type != VFP_LDSTMIA, _("this addressing mode requires base-register writeback")); inst.instruction |= inst.operands[0].reg << 16; - encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sd); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sd); inst.instruction |= inst.operands[1].imm; } @@ -5648,7 +6373,7 @@ vfp_dp_ldstm (enum vfp_ldstm_type ldstm_type) _("this addressing mode requires base-register writeback")); inst.instruction |= inst.operands[0].reg << 16; - inst.instruction |= inst.operands[1].reg << 12; + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd); count = inst.operands[1].imm << 1; if (ldstm_type == VFP_LDSTMIAX || ldstm_type == VFP_LDSTMDBX) @@ -5692,6 +6417,103 @@ do_vfp_xp_ldstmdb (void) { vfp_dp_ldstm (VFP_LDSTMDBX); } + +static void +do_vfp_dp_rd_rm (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dm); +} + +static void +do_vfp_dp_rn_rd (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dn); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd); +} + +static void +do_vfp_dp_rd_rn (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dn); +} + +static void +do_vfp_dp_rd_rn_rm (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dn); + encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Dm); +} + +static void +do_vfp_dp_rd (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); +} + +static void +do_vfp_dp_rm_rd_rn (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dm); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd); + encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Dn); +} + +/* VFPv3 instructions. */ +static void +do_vfp_sp_const (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); + inst.instruction |= (inst.operands[1].imm & 15) << 16; + inst.instruction |= (inst.operands[1].imm >> 4); +} + +static void +do_vfp_dp_const (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); + inst.instruction |= (inst.operands[1].imm & 15) << 16; + inst.instruction |= (inst.operands[1].imm >> 4); +} + +static void +vfp_conv (int srcsize) +{ + unsigned immbits = srcsize - inst.operands[1].imm; + inst.instruction |= (immbits & 1) << 5; + inst.instruction |= (immbits >> 1); +} + +static void +do_vfp_sp_conv_16 (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); + vfp_conv (16); +} + +static void +do_vfp_dp_conv_16 (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); + vfp_conv (16); +} + +static void +do_vfp_sp_conv_32 (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); + vfp_conv (32); +} + +static void +do_vfp_dp_conv_32 (void) +{ + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); + vfp_conv (32); +} + /* FPA instructions. Also in a logical order. */ @@ -7998,6 +8820,2549 @@ do_t_usat16 (void) inst.instruction |= inst.operands[1].imm; inst.instruction |= inst.operands[2].reg << 16; } + +/* Neon instruction encoder helpers. */ + +/* Encodings for the different types for various Neon opcodes. */ + +/* An "invalid" code for the following tables. */ +#define N_INV -1u + +struct neon_tab_entry +{ + unsigned integer; + unsigned float_or_poly; + unsigned scalar_or_imm; +}; + +/* Map overloaded Neon opcodes to their respective encodings. */ +#define NEON_ENC_TAB \ + X(vabd, 0x0000700, 0x1200d00, N_INV), \ + X(vmax, 0x0000600, 0x0000f00, N_INV), \ + X(vmin, 0x0000610, 0x0200f00, N_INV), \ + X(vpadd, 0x0000b10, 0x1000d00, N_INV), \ + X(vpmax, 0x0000a00, 0x1000f00, N_INV), \ + X(vpmin, 0x0000a10, 0x1200f00, N_INV), \ + X(vadd, 0x0000800, 0x0000d00, N_INV), \ + X(vsub, 0x1000800, 0x0200d00, N_INV), \ + X(vceq, 0x1000810, 0x0000e00, 0x1b10100), \ + X(vcge, 0x0000310, 0x1000e00, 0x1b10080), \ + X(vcgt, 0x0000300, 0x1200e00, 0x1b10000), \ + /* Register variants of the following two instructions are encoded as + vcge / vcgt with the operands reversed. */ \ + X(vclt, 0x0000310, 0x1000e00, 0x1b10200), \ + X(vcle, 0x0000300, 0x1200e00, 0x1b10180), \ + X(vmla, 0x0000900, 0x0000d10, 0x0800040), \ + X(vmls, 0x1000900, 0x0200d10, 0x0800440), \ + X(vmul, 0x0000910, 0x1000d10, 0x0800840), \ + X(vmull, 0x0800c00, 0x0800e00, 0x0800a40), /* polynomial not float. */ \ + X(vmlal, 0x0800800, N_INV, 0x0800240), \ + X(vmlsl, 0x0800a00, N_INV, 0x0800640), \ + X(vqdmlal, 0x0800900, N_INV, 0x0800340), \ + X(vqdmlsl, 0x0800b00, N_INV, 0x0800740), \ + X(vqdmull, 0x0800d00, N_INV, 0x0800b40), \ + X(vqdmulh, 0x0000b00, N_INV, 0x0800c40), \ + X(vqrdmulh, 0x1000b00, N_INV, 0x0800d40), \ + X(vshl, 0x0000400, N_INV, 0x0800510), \ + X(vqshl, 0x0000410, N_INV, 0x0800710), \ + X(vand, 0x0000110, N_INV, 0x0800030), \ + X(vbic, 0x0100110, N_INV, 0x0800030), \ + X(veor, 0x1000110, N_INV, N_INV), \ + X(vorn, 0x0300110, N_INV, 0x0800010), \ + X(vorr, 0x0200110, N_INV, 0x0800010), \ + X(vmvn, 0x1b00580, N_INV, 0x0800030), \ + X(vshll, 0x1b20300, N_INV, 0x0800a10), /* max shift, immediate. */ \ + X(vcvt, 0x1b30600, N_INV, 0x0800e10), /* integer, fixed-point. */ \ + X(vdup, 0xe800b10, N_INV, 0x1b00c00), /* arm, scalar. */ \ + X(vld1, 0x0200000, 0x0a00000, 0x0a00c00), /* interlv, lane, dup. */ \ + X(vst1, 0x0000000, 0x0800000, N_INV), \ + X(vld2, 0x0200100, 0x0a00100, 0x0a00d00), \ + X(vst2, 0x0000100, 0x0800100, N_INV), \ + X(vld3, 0x0200200, 0x0a00200, 0x0a00e00), \ + X(vst3, 0x0000200, 0x0800200, N_INV), \ + X(vld4, 0x0200300, 0x0a00300, 0x0a00f00), \ + X(vst4, 0x0000300, 0x0800300, N_INV), \ + X(vmovn, 0x1b20200, N_INV, N_INV), \ + X(vtrn, 0x1b20080, N_INV, N_INV), \ + X(vqmovn, 0x1b20200, N_INV, N_INV), \ + X(vqmovun, 0x1b20240, N_INV, N_INV) + +enum neon_opc +{ +#define X(OPC,I,F,S) N_MNEM_##OPC +NEON_ENC_TAB +#undef X +}; + +static const struct neon_tab_entry neon_enc_tab[] = +{ +#define X(OPC,I,F,S) { (I), (F), (S) } +NEON_ENC_TAB +#undef X +}; + +#define NEON_ENC_INTEGER(X) (neon_enc_tab[(X) & 0x0fffffff].integer) +#define NEON_ENC_ARMREG(X) (neon_enc_tab[(X) & 0x0fffffff].integer) +#define NEON_ENC_POLY(X) (neon_enc_tab[(X) & 0x0fffffff].float_or_poly) +#define NEON_ENC_FLOAT(X) (neon_enc_tab[(X) & 0x0fffffff].float_or_poly) +#define NEON_ENC_SCALAR(X) (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm) +#define NEON_ENC_IMMED(X) (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm) +#define NEON_ENC_INTERLV(X) (neon_enc_tab[(X) & 0x0fffffff].integer) +#define NEON_ENC_LANE(X) (neon_enc_tab[(X) & 0x0fffffff].float_or_poly) +#define NEON_ENC_DUP(X) (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm) + +/* Shapes for instruction operands. Some (e.g. NS_DDD_QQQ) represent multiple + shapes which an instruction can accept. The following mnemonic characters + are used in the tag names for this enumeration: + + D - Neon D register + Q - Neon Q register + I - Immediate + S - Scalar + R - ARM register + L - D register list +*/ + +enum neon_shape +{ + NS_DDD_QQQ, + NS_DDD, + NS_QQQ, + NS_DDI_QQI, + NS_DDI, + NS_QQI, + NS_DDS_QQS, + NS_DDS, + NS_QQS, + NS_DD_QQ, + NS_DD, + NS_QQ, + NS_DS_QS, + NS_DS, + NS_QS, + NS_DR_QR, + NS_DR, + NS_QR, + NS_DI_QI, + NS_DI, + NS_QI, + NS_DLD, + NS_DQ, + NS_QD, + NS_DQI, + NS_QDI, + NS_QDD, + NS_QDS, + NS_QQD, + NS_DQQ, + NS_DDDI_QQQI, + NS_DDDI, + NS_QQQI, + NS_IGNORE +}; + +/* Bit masks used in type checking given instructions. + 'N_EQK' means the type must be the same as (or based on in some way) the key + type, which itself is marked with the 'N_KEY' bit. If the 'N_EQK' bit is + set, various other bits can be set as well in order to modify the meaning of + the type constraint. */ + +enum neon_type_mask +{ + N_S8 = 0x000001, + N_S16 = 0x000002, + N_S32 = 0x000004, + N_S64 = 0x000008, + N_U8 = 0x000010, + N_U16 = 0x000020, + N_U32 = 0x000040, + N_U64 = 0x000080, + N_I8 = 0x000100, + N_I16 = 0x000200, + N_I32 = 0x000400, + N_I64 = 0x000800, + N_8 = 0x001000, + N_16 = 0x002000, + N_32 = 0x004000, + N_64 = 0x008000, + N_P8 = 0x010000, + N_P16 = 0x020000, + N_F32 = 0x040000, + N_KEY = 0x080000, /* key element (main type specifier). */ + N_EQK = 0x100000, /* given operand has the same type & size as the key. */ + N_DBL = 0x000001, /* if N_EQK, this operand is twice the size. */ + N_HLF = 0x000002, /* if N_EQK, this operand is half the size. */ + N_SGN = 0x000004, /* if N_EQK, this operand is forced to be signed. */ + N_UNS = 0x000008, /* if N_EQK, this operand is forced to be unsigned. */ + N_INT = 0x000010, /* if N_EQK, this operand is forced to be integer. */ + N_FLT = 0x000020, /* if N_EQK, this operand is forced to be float. */ + N_UTYP = 0, + N_MAX_NONSPECIAL = N_F32 +}; + +#define N_SU_ALL (N_S8 | N_S16 | N_S32 | N_S64 | N_U8 | N_U16 | N_U32 | N_U64) +#define N_SU_32 (N_S8 | N_S16 | N_S32 | N_U8 | N_U16 | N_U32) +#define N_SU_16_64 (N_S16 | N_S32 | N_S64 | N_U16 | N_U32 | N_U64) +#define N_SUF_32 (N_SU_32 | N_F32) +#define N_I_ALL (N_I8 | N_I16 | N_I32 | N_I64) +#define N_IF_32 (N_I8 | N_I16 | N_I32 | N_F32) + +/* Pass this as the first type argument to neon_check_type to ignore types + altogether. */ +#define N_IGNORE_TYPE (N_KEY | N_EQK) + +/* Check the shape of a Neon instruction (sizes of registers). Returns the more + specific shape when there are two alternatives. For non-polymorphic shapes, + checking is done during operand parsing, so is not implemented here. */ + +static enum neon_shape +neon_check_shape (enum neon_shape req) +{ +#define RR(X) (inst.operands[(X)].isreg) +#define RD(X) (inst.operands[(X)].isreg && !inst.operands[(X)].isquad) +#define RQ(X) (inst.operands[(X)].isreg && inst.operands[(X)].isquad) +#define IM(X) (!inst.operands[(X)].isreg && !inst.operands[(X)].isscalar) +#define SC(X) (!inst.operands[(X)].isreg && inst.operands[(X)].isscalar) + + /* Fix missing optional operands. FIXME: we don't know at this point how + many arguments we should have, so this makes the assumption that we have + > 1. This is true of all current Neon opcodes, I think, but may not be + true in the future. */ + if (!inst.operands[1].present) + inst.operands[1] = inst.operands[0]; + + switch (req) + { + case NS_DDD_QQQ: + { + if (RD(0) && RD(1) && RD(2)) + return NS_DDD; + else if (RQ(0) && RQ(1) && RQ(1)) + return NS_QQQ; + else + inst.error = _("expected , , or
, , " + "operands"); + } + break; + + case NS_DDI_QQI: + { + if (RD(0) && RD(1) && IM(2)) + return NS_DDI; + else if (RQ(0) && RQ(1) && IM(2)) + return NS_QQI; + else + inst.error = _("expected , , # or
, , # " + "operands"); + } + break; + + case NS_DDDI_QQQI: + { + if (RD(0) && RD(1) && RD(2) && IM(3)) + return NS_DDDI; + if (RQ(0) && RQ(1) && RQ(2) && IM(3)) + return NS_QQQI; + else + inst.error = _("expected , , , # or " + "
, , , # operands"); + } + break; + + case NS_DDS_QQS: + { + if (RD(0) && RD(1) && SC(2)) + return NS_DDS; + else if (RQ(0) && RQ(1) && SC(2)) + return NS_QQS; + else + inst.error = _("expected , , or
, , " + "operands"); + } + break; + + case NS_DD_QQ: + { + if (RD(0) && RD(1)) + return NS_DD; + else if (RQ(0) && RQ(1)) + return NS_QQ; + else + inst.error = _("expected , or
, operands"); + } + break; + + case NS_DS_QS: + { + if (RD(0) && SC(1)) + return NS_DS; + else if (RQ(0) && SC(1)) + return NS_QS; + else + inst.error = _("expected , or
, operands"); + } + break; + + case NS_DR_QR: + { + if (RD(0) && RR(1)) + return NS_DR; + else if (RQ(0) && RR(1)) + return NS_QR; + else + inst.error = _("expected , or
, operands"); + } + break; + + case NS_DI_QI: + { + if (RD(0) && IM(1)) + return NS_DI; + else if (RQ(0) && IM(1)) + return NS_QI; + else + inst.error = _("expected , # or
, # operands"); + } + break; + + default: + abort (); + } + + return req; +#undef RR +#undef RD +#undef RQ +#undef IM +#undef SC +} + +static void +neon_modify_type_size (unsigned typebits, enum neon_el_type *g_type, + unsigned *g_size) +{ + /* Allow modification to be made to types which are constrained to be + based on the key element, based on bits set alongside N_EQK. */ + if ((typebits & N_EQK) != 0) + { + if ((typebits & N_HLF) != 0) + *g_size /= 2; + else if ((typebits & N_DBL) != 0) + *g_size *= 2; + if ((typebits & N_SGN) != 0) + *g_type = NT_signed; + else if ((typebits & N_UNS) != 0) + *g_type = NT_unsigned; + else if ((typebits & N_INT) != 0) + *g_type = NT_integer; + else if ((typebits & N_FLT) != 0) + *g_type = NT_float; + } +} + +/* Return operand OPNO promoted by bits set in THISARG. KEY should be the "key" + operand type, i.e. the single type specified in a Neon instruction when it + is the only one given. */ + +static struct neon_type_el +neon_type_promote (struct neon_type_el *key, unsigned thisarg) +{ + struct neon_type_el dest = *key; + + assert ((thisarg & N_EQK) != 0); + + neon_modify_type_size (thisarg, &dest.type, &dest.size); + + return dest; +} + +/* Convert Neon type and size into compact bitmask representation. */ + +static enum neon_type_mask +type_chk_of_el_type (enum neon_el_type type, unsigned size) +{ + switch (type) + { + case NT_untyped: + switch (size) + { + case 8: return N_8; + case 16: return N_16; + case 32: return N_32; + case 64: return N_64; + default: ; + } + break; + + case NT_integer: + switch (size) + { + case 8: return N_I8; + case 16: return N_I16; + case 32: return N_I32; + case 64: return N_I64; + default: ; + } + break; + + case NT_float: + if (size == 32) + return N_F32; + break; + + case NT_poly: + switch (size) + { + case 8: return N_P8; + case 16: return N_P16; + default: ; + } + break; + + case NT_signed: + switch (size) + { + case 8: return N_S8; + case 16: return N_S16; + case 32: return N_S32; + case 64: return N_S64; + default: ; + } + break; + + case NT_unsigned: + switch (size) + { + case 8: return N_U8; + case 16: return N_U16; + case 32: return N_U32; + case 64: return N_U64; + default: ; + } + break; + + default: ; + } + + return N_UTYP; +} + +/* Convert compact Neon bitmask type representation to a type and size. Only + handles the case where a single bit is set in the mask. */ + +static void +el_type_of_type_chk (enum neon_el_type *type, unsigned *size, + enum neon_type_mask mask) +{ + if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0) + *size = 8; + if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_P16)) != 0) + *size = 16; + if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0) + *size = 32; + if ((mask & (N_S64 | N_U64 | N_I64 | N_64)) != 0) + *size = 64; + if ((mask & (N_S8 | N_S16 | N_S32 | N_S64)) != 0) + *type = NT_signed; + if ((mask & (N_U8 | N_U16 | N_U32 | N_U64)) != 0) + *type = NT_unsigned; + if ((mask & (N_I8 | N_I16 | N_I32 | N_I64)) != 0) + *type = NT_integer; + if ((mask & (N_8 | N_16 | N_32 | N_64)) != 0) + *type = NT_untyped; + if ((mask & (N_P8 | N_P16)) != 0) + *type = NT_poly; + if ((mask & N_F32) != 0) + *type = NT_float; +} + +/* Modify a bitmask of allowed types. This is only needed for type + relaxation. */ + +static unsigned +modify_types_allowed (unsigned allowed, unsigned mods) +{ + unsigned size; + enum neon_el_type type; + unsigned destmask; + int i; + + destmask = 0; + + for (i = 1; i <= N_MAX_NONSPECIAL; i <<= 1) + { + el_type_of_type_chk (&type, &size, allowed & i); + neon_modify_type_size (mods, &type, &size); + destmask |= type_chk_of_el_type (type, size); + } + + return destmask; +} + +/* Check type and return type classification. + The manual states (paraphrase): If one datatype is given, it indicates the + type given in: + - the second operand, if there is one + - the operand, if there is no second operand + - the result, if there are no operands. + This isn't quite good enough though, so we use a concept of a "key" datatype + which is set on a per-instruction basis, which is the one which matters when + only one data type is written. + Note: this function has side-effects (e.g. filling in missing operands). All + Neon instructions should call it before performing bit encoding. +*/ + +static struct neon_type_el +neon_check_type (unsigned els, enum neon_shape ns, ...) +{ + va_list ap; + unsigned i, pass, key_el = 0; + unsigned types[NEON_MAX_TYPE_ELS]; + enum neon_el_type k_type = NT_invtype; + unsigned k_size = -1u; + struct neon_type_el badtype = {NT_invtype, -1}; + unsigned key_allowed = 0; + + /* Optional registers in Neon instructions are always (not) in operand 1. + Fill in the missing operand here, if it was omitted. */ + if (els > 1 && !inst.operands[1].present) + inst.operands[1] = inst.operands[0]; + + /* Suck up all the varargs. */ + va_start (ap, ns); + for (i = 0; i < els; i++) + { + unsigned thisarg = va_arg (ap, unsigned); + if (thisarg == N_IGNORE_TYPE) + { + va_end (ap); + return badtype; + } + types[i] = thisarg; + if ((thisarg & N_KEY) != 0) + key_el = i; + } + va_end (ap); + + /* Duplicate inst.vectype elements here as necessary. + FIXME: No idea if this is exactly the same as the ARM assembler, + particularly when an insn takes one register and one non-register + operand. */ + if (inst.vectype.elems == 1 && els > 1) + { + unsigned j; + inst.vectype.elems = els; + inst.vectype.el[key_el] = inst.vectype.el[0]; + for (j = 0; j < els; j++) + { + if (j != key_el) + inst.vectype.el[j] = neon_type_promote (&inst.vectype.el[key_el], + types[j]); + } + } + else if (inst.vectype.elems != els) + { + inst.error = _("type specifier has the wrong number of parts"); + return badtype; + } + + for (pass = 0; pass < 2; pass++) + { + for (i = 0; i < els; i++) + { + unsigned thisarg = types[i]; + unsigned types_allowed = ((thisarg & N_EQK) != 0 && pass != 0) + ? modify_types_allowed (key_allowed, thisarg) : thisarg; + enum neon_el_type g_type = inst.vectype.el[i].type; + unsigned g_size = inst.vectype.el[i].size; + + /* Decay more-specific signed & unsigned types to sign-insensitive + integer types if sign-specific variants are unavailable. */ + if ((g_type == NT_signed || g_type == NT_unsigned) + && (types_allowed & N_SU_ALL) == 0) + g_type = NT_integer; + + /* If only untyped args are allowed, decay any more specific types to + them. Some instructions only care about signs for some element + sizes, so handle that properly. */ + if ((g_size == 8 && (types_allowed & N_8) != 0) + || (g_size == 16 && (types_allowed & N_16) != 0) + || (g_size == 32 && (types_allowed & N_32) != 0) + || (g_size == 64 && (types_allowed & N_64) != 0)) + g_type = NT_untyped; + + if (pass == 0) + { + if ((thisarg & N_KEY) != 0) + { + k_type = g_type; + k_size = g_size; + key_allowed = thisarg & ~N_KEY; + } + } + else + { + if ((thisarg & N_EQK) == 0) + { + unsigned given_type = type_chk_of_el_type (g_type, g_size); + + if ((given_type & types_allowed) == 0) + { + inst.error = _("bad type in Neon instruction"); + return badtype; + } + } + else + { + enum neon_el_type mod_k_type = k_type; + unsigned mod_k_size = k_size; + neon_modify_type_size (thisarg, &mod_k_type, &mod_k_size); + if (g_type != mod_k_type || g_size != mod_k_size) + { + inst.error = _("inconsistent types in Neon instruction"); + return badtype; + } + } + } + } + } + + return inst.vectype.el[key_el]; +} + +/* Fix up Neon data-processing instructions, ORing in the correct bits for + ARM mode or Thumb mode and moving the encoded bit 24 to bit 28. */ + +static unsigned +neon_dp_fixup (unsigned i) +{ + if (thumb_mode) + { + /* The U bit is at bit 24 by default. Move to bit 28 in Thumb mode. */ + if (i & (1 << 24)) + i |= 1 << 28; + + i &= ~(1 << 24); + + i |= 0xef000000; + } + else + i |= 0xf2000000; + + return i; +} + +/* Turn a size (8, 16, 32, 64) into the respective bit number minus 3 + (0, 1, 2, 3). */ + +static unsigned +neon_logbits (unsigned x) +{ + return ffs (x) - 4; +} + +#define LOW4(R) ((R) & 0xf) +#define HI1(R) (((R) >> 4) & 1) + +/* Encode insns with bit pattern: + + |28/24|23|22 |21 20|19 16|15 12|11 8|7|6|5|4|3 0| + | U |x |D |size | Rn | Rd |x x x x|N|Q|M|x| Rm | + + SIZE is passed in bits. -1 means size field isn't changed, in case it has a + different meaning for some instruction. */ + +static void +neon_three_same (int first_optional, int isquad, int ubit, int size) +{ + /* FIXME optional argument handling. */ + if (first_optional && !inst.operands[0].present) + inst.operands[0].reg = inst.operands[1].reg; + + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= LOW4 (inst.operands[2].reg); + inst.instruction |= HI1 (inst.operands[2].reg) << 5; + inst.instruction |= (isquad != 0) << 6; + inst.instruction |= (ubit != 0) << 24; + if (size != -1) + inst.instruction |= neon_logbits (size) << 20; + + inst.instruction = neon_dp_fixup (inst.instruction); +} + +/* Encode instructions of the form: + + |28/24|23|22|21 20|19 18|17 16|15 12|11 7|6|5|4|3 0| + | U |x |D |x x |size |x x | Rd |x x x x x|Q|M|x| Rm | + + Don't write size if SIZE == -1. */ + +static void +neon_two_same (int qbit, int ubit, int size) +{ + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= (qbit != 0) << 6; + inst.instruction |= (ubit != 0) << 24; + + if (size != -1) + inst.instruction |= neon_logbits (size) << 18; + + inst.instruction = neon_dp_fixup (inst.instruction); +} + +/* Neon instruction encoders, in approximate order of appearance. */ + +static void +do_neon_dyadic_i_su (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_SU_32 | N_KEY); + neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size); +} + +static void +do_neon_dyadic_i64_su (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_SU_ALL | N_KEY); + neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size); +} + +static void +neon_imm_shift (int write_ubit, int uval, int isquad, struct neon_type_el et, + unsigned immbits) +{ + unsigned size = et.size >> 3; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= (isquad != 0) << 6; + inst.instruction |= immbits << 16; + inst.instruction |= (size >> 3) << 7; + inst.instruction |= (size & 0x7) << 19; + if (write_ubit) + inst.instruction |= (uval != 0) << 24; + + inst.instruction = neon_dp_fixup (inst.instruction); +} + +static void +do_neon_shl_imm (void) +{ + if (!inst.operands[2].isreg) + { + enum neon_shape rs = neon_check_shape (NS_DDI_QQI); + struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL); + inst.instruction = NEON_ENC_IMMED (inst.instruction); + neon_imm_shift (FALSE, 0, rs == NS_QQI, et, inst.operands[2].imm); + } + else + { + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN); + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size); + } +} + +static void +do_neon_qshl_imm (void) +{ + if (!inst.operands[2].isreg) + { + enum neon_shape rs = neon_check_shape (NS_DDI_QQI); + struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY); + inst.instruction = NEON_ENC_IMMED (inst.instruction); + neon_imm_shift (TRUE, et.type == NT_unsigned, rs == NS_QQI, et, + inst.operands[2].imm); + } + else + { + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN); + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size); + } +} + +static int +neon_cmode_for_logic_imm (unsigned immediate, unsigned *immbits, int size) +{ + /* Handle .I8 and .I64 as pseudo-instructions. */ + switch (size) + { + case 8: + /* Unfortunately, this will make everything apart from zero out-of-range. + FIXME is this the intended semantics? There doesn't seem much point in + accepting .I8 if so. */ + immediate |= immediate << 8; + size = 16; + break; + case 64: + /* Similarly, anything other than zero will be replicated in bits [63:32], + which probably isn't want we want if we specified .I64. */ + if (immediate != 0) + goto bad_immediate; + size = 32; + break; + default: ; + } + + if (immediate == (immediate & 0x000000ff)) + { + *immbits = immediate; + return (size == 16) ? 0x9 : 0x1; + } + else if (immediate == (immediate & 0x0000ff00)) + { + *immbits = immediate >> 8; + return (size == 16) ? 0xb : 0x3; + } + else if (immediate == (immediate & 0x00ff0000)) + { + *immbits = immediate >> 16; + return 0x5; + } + else if (immediate == (immediate & 0xff000000)) + { + *immbits = immediate >> 24; + return 0x7; + } + + bad_immediate: + inst.error = _("immediate value out of range"); + return FAIL; +} + +/* True if IMM has form 0bAAAAAAAABBBBBBBBCCCCCCCCDDDDDDDD for bits + A, B, C, D. */ + +static int +neon_bits_same_in_bytes (unsigned imm) +{ + return ((imm & 0x000000ff) == 0 || (imm & 0x000000ff) == 0x000000ff) + && ((imm & 0x0000ff00) == 0 || (imm & 0x0000ff00) == 0x0000ff00) + && ((imm & 0x00ff0000) == 0 || (imm & 0x00ff0000) == 0x00ff0000) + && ((imm & 0xff000000) == 0 || (imm & 0xff000000) == 0xff000000); +} + +/* For immediate of above form, return 0bABCD. */ + +static unsigned +neon_squash_bits (unsigned imm) +{ + return (imm & 0x01) | ((imm & 0x0100) >> 7) | ((imm & 0x010000) >> 14) + | ((imm & 0x01000000) >> 21); +} + +/* Returns 1 if a number has "quarter-precision" float format + 0baBbbbbbc defgh000 00000000 00000000. */ + +static int +neon_is_quarter_float (unsigned imm) +{ + int b = (imm & 0x20000000) != 0; + int bs = (b << 25) | (b << 26) | (b << 27) | (b << 28) | (b << 29) + | ((!b) << 30); + return (imm & 0x81ffffff) == (imm & 0x81f80000) + && ((imm & 0x7e000000) ^ bs) == 0; +} + +/* Compress above representation to 0b...000 abcdefgh. */ + +static unsigned +neon_qfloat_bits (unsigned imm) +{ + return ((imm >> 19) & 0x7f) | (imm >> 24); +} + +/* Returns CMODE. IMMBITS [7:0] is set to bits suitable for inserting into + the instruction. *OP is passed as the initial value of the op field, and + may be set to a different value depending on the constant (i.e. + "MOV I64, 0bAAAAAAAABBBB..." which uses OP = 1 despite being MOV not + MVN). */ + +static int +neon_cmode_for_move_imm (unsigned immlo, unsigned immhi, unsigned *immbits, + int *op, int size) +{ + if (size == 64 && neon_bits_same_in_bytes (immhi) + && neon_bits_same_in_bytes (immlo)) + { + /* Check this one first so we don't have to bother with immhi in later + tests. */ + if (*op == 1) + return FAIL; + *immbits = (neon_squash_bits (immhi) << 4) | neon_squash_bits (immlo); + *op = 1; + return 0xe; + } + else if (immhi != 0) + return FAIL; + else if (immlo == (immlo & 0x000000ff)) + { + /* 64-bit case was already handled. Don't allow MVN with 8-bit + immediate. */ + if ((size != 8 && size != 16 && size != 32) + || (size == 8 && *op == 1)) + return FAIL; + *immbits = immlo; + return (size == 8) ? 0xe : (size == 16) ? 0x8 : 0x0; + } + else if (immlo == (immlo & 0x0000ff00)) + { + if (size != 16 && size != 32) + return FAIL; + *immbits = immlo >> 8; + return (size == 16) ? 0xa : 0x2; + } + else if (immlo == (immlo & 0x00ff0000)) + { + if (size != 32) + return FAIL; + *immbits = immlo >> 16; + return 0x4; + } + else if (immlo == (immlo & 0xff000000)) + { + if (size != 32) + return FAIL; + *immbits = immlo >> 24; + return 0x6; + } + else if (immlo == ((immlo & 0x0000ff00) | 0x000000ff)) + { + if (size != 32) + return FAIL; + *immbits = (immlo >> 8) & 0xff; + return 0xc; + } + else if (immlo == ((immlo & 0x00ff0000) | 0x0000ffff)) + { + if (size != 32) + return FAIL; + *immbits = (immlo >> 16) & 0xff; + return 0xd; + } + else if (neon_is_quarter_float (immlo)) + { + if (size != 32 || *op == 1) + return FAIL; + *immbits = neon_qfloat_bits (immlo); + return 0xf; + } + + return FAIL; +} + +/* Write immediate bits [7:0] to the following locations: + + |28/24|23 19|18 16|15 4|3 0| + | a |x x x x x|b c d|x x x x x x x x x x x x|e f g h| + + This function is used by VMOV/VMVN/VORR/VBIC. */ + +static void +neon_write_immbits (unsigned immbits) +{ + inst.instruction |= immbits & 0xf; + inst.instruction |= ((immbits >> 4) & 0x7) << 16; + inst.instruction |= ((immbits >> 7) & 0x1) << 24; +} + +/* Invert low-order SIZE bits of XHI:XLO. */ + +static void +neon_invert_size (unsigned *xlo, unsigned *xhi, int size) +{ + unsigned immlo = xlo ? *xlo : 0; + unsigned immhi = xhi ? *xhi : 0; + + switch (size) + { + case 8: + immlo = (~immlo) & 0xff; + break; + + case 16: + immlo = (~immlo) & 0xffff; + break; + + case 64: + immhi = (~immhi) & 0xffffffff; + /* fall through. */ + + case 32: + immlo = (~immlo) & 0xffffffff; + break; + + default: + abort (); + } + + if (xlo) + *xlo = immlo; + + if (xhi) + *xhi = immhi; +} + +static void +do_neon_logic (void) +{ + if (inst.operands[2].present && inst.operands[2].isreg) + { + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + neon_check_type (3, rs, N_IGNORE_TYPE); + /* U bit and size field were set as part of the bitmask. */ + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + neon_three_same (TRUE, rs == NS_QQQ, 0, -1); + } + else + { + enum neon_shape rs = neon_check_shape (NS_DI_QI); + struct neon_type_el et = neon_check_type (1, rs, N_I8 | N_I16 | N_I32 + | N_I64 | N_F32); + enum neon_opc opcode = inst.instruction & 0x0fffffff; + unsigned immbits; + int cmode; + + if (et.type == NT_invtype) + return; + + inst.instruction = NEON_ENC_IMMED (inst.instruction); + + switch (opcode) + { + case N_MNEM_vbic: + cmode = neon_cmode_for_logic_imm (inst.operands[1].imm, &immbits, + et.size); + break; + + case N_MNEM_vorr: + cmode = neon_cmode_for_logic_imm (inst.operands[1].imm, &immbits, + et.size); + break; + + case N_MNEM_vand: + /* Pseudo-instruction for VBIC. */ + immbits = inst.operands[1].imm; + neon_invert_size (&immbits, 0, et.size); + cmode = neon_cmode_for_logic_imm (immbits, &immbits, et.size); + break; + + case N_MNEM_vorn: + /* Pseudo-instruction for VORR. */ + immbits = inst.operands[1].imm; + neon_invert_size (&immbits, 0, et.size); + cmode = neon_cmode_for_logic_imm (immbits, &immbits, et.size); + break; + + default: + abort (); + } + + if (cmode == FAIL) + return; + + inst.instruction |= (rs == NS_QI) << 6; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= cmode << 8; + neon_write_immbits (immbits); + + inst.instruction = neon_dp_fixup (inst.instruction); + } +} + +static void +do_neon_bitfield (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + /* FIXME: Check that no type was given. */ + neon_three_same (FALSE, rs == NS_QQQ, 0, -1); +} + +static void +neon_dyadic (enum neon_el_type ubit_meaning, unsigned types) +{ + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK, types | N_KEY); + if (et.type == NT_float) + { + inst.instruction = NEON_ENC_FLOAT (inst.instruction); + neon_three_same (TRUE, rs == NS_QQQ, 0, -1); + } + else + { + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + neon_three_same (TRUE, rs == NS_QQQ, et.type == ubit_meaning, et.size); + } +} + +static void +do_neon_dyadic_if_su (void) +{ + neon_dyadic (NT_unsigned, N_SUF_32); +} + +static void +do_neon_dyadic_if_su_d (void) +{ + /* This version only allow D registers, but that constraint is enforced during + operand parsing so we don't need to do anything extra here. */ + neon_dyadic (NT_unsigned, N_SUF_32); +} + +static void +do_neon_dyadic_if_i (void) +{ + neon_dyadic (NT_unsigned, N_IF_32); +} + +static void +do_neon_dyadic_if_i_d (void) +{ + neon_dyadic (NT_unsigned, N_IF_32); +} + +static void +do_neon_addsub_if_i (void) +{ + /* The "untyped" case can't happen. Do this to stop the "U" bit being + affected if we specify unsigned args. */ + neon_dyadic (NT_untyped, N_IF_32 | N_I64); +} + +/* Swaps operands 1 and 2. If operand 1 (optional arg) was omitted, we want the + result to be: + V A,B (A is operand 0, B is operand 2) + to mean: + V A,B,A + not: + V A,B,B + so handle that case specially. */ + +static void +neon_exchange_operands (void) +{ + void *scratch = alloca (sizeof (inst.operands[0])); + if (inst.operands[1].present) + { + /* Swap operands[1] and operands[2]. */ + memcpy (scratch, &inst.operands[1], sizeof (inst.operands[0])); + inst.operands[1] = inst.operands[2]; + memcpy (&inst.operands[2], scratch, sizeof (inst.operands[0])); + } + else + { + inst.operands[1] = inst.operands[2]; + inst.operands[2] = inst.operands[0]; + } +} + +static void +neon_compare (unsigned regtypes, unsigned immtypes, int invert) +{ + if (inst.operands[2].isreg) + { + if (invert) + neon_exchange_operands (); + neon_dyadic (NT_unsigned, regtypes); + } + else + { + enum neon_shape rs = neon_check_shape (NS_DDI_QQI); + struct neon_type_el et = neon_check_type (2, rs, N_EQK, immtypes | N_KEY); + + inst.instruction = NEON_ENC_IMMED (inst.instruction); + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= (rs == NS_QQI) << 6; + inst.instruction |= (et.type == NT_float) << 10; + inst.instruction |= neon_logbits (et.size) << 18; + + inst.instruction = neon_dp_fixup (inst.instruction); + } +} + +static void +do_neon_cmp (void) +{ + neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, FALSE); +} + +static void +do_neon_cmp_inv (void) +{ + neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, TRUE); +} + +static void +do_neon_ceq (void) +{ + neon_compare (N_IF_32, N_IF_32, FALSE); +} + +/* For multiply instructions, we have the possibility of 16-bit or 32-bit + scalars, which are encoded in 5 bits, M : Rm. + For 16-bit scalars, the register is encoded in Rm[2:0] and the index in + M:Rm[3], and for 32-bit scalars, the register is encoded in Rm[3:0] and the + index in M. */ + +static unsigned +neon_scalar_for_mul (unsigned scalar, unsigned elsize) +{ + unsigned regno = scalar >> 3; + unsigned elno = scalar & 7; + + switch (elsize) + { + case 16: + if (regno > 7 || elno > 3) + goto bad_scalar; + return regno | (elno << 3); + + case 32: + if (regno > 15 || elno > 1) + goto bad_scalar; + return regno | (elno << 4); + + default: + bad_scalar: + as_bad (_("Scalar out of range for multiply instruction")); + } + + return 0; +} + +/* Encode multiply / multiply-accumulate scalar instructions. */ + +static void +neon_mul_mac (struct neon_type_el et, int ubit) +{ + unsigned scalar = neon_scalar_for_mul (inst.operands[2].reg, et.size); + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= LOW4 (scalar); + inst.instruction |= HI1 (scalar) << 5; + inst.instruction |= (et.type == NT_float) << 8; + inst.instruction |= neon_logbits (et.size) << 20; + inst.instruction |= (ubit != 0) << 24; + + inst.instruction = neon_dp_fixup (inst.instruction); +} + +static void +do_neon_mac_maybe_scalar (void) +{ + if (inst.operands[2].isscalar) + { + enum neon_shape rs = neon_check_shape (NS_DDS_QQS); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_I16 | N_I32 | N_F32 | N_KEY); + inst.instruction = NEON_ENC_SCALAR (inst.instruction); + neon_mul_mac (et, rs == NS_QQS); + } + else + do_neon_dyadic_if_i (); +} + +static void +do_neon_tst (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_8 | N_16 | N_32 | N_KEY); + neon_three_same (TRUE, rs == NS_QQQ, 0, et.size); +} + +/* VMUL with 3 registers allows the P8 type. The scalar version supports the + same types as the MAC equivalents. The polynomial type for this instruction + is encoded the same as the integer type. */ + +static void +do_neon_mul (void) +{ + if (inst.operands[2].isscalar) + do_neon_mac_maybe_scalar (); + else + neon_dyadic (NT_poly, N_I8 | N_I16 | N_I32 | N_F32 | N_P8); +} + +static void +do_neon_qdmulh (void) +{ + if (inst.operands[2].isscalar) + { + enum neon_shape rs = neon_check_shape (NS_DDS_QQS); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_S16 | N_S32 | N_KEY); + inst.instruction = NEON_ENC_SCALAR (inst.instruction); + neon_mul_mac (et, rs == NS_QQS); + } + else + { + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_S16 | N_S32 | N_KEY); + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + /* The U bit (rounding) comes from bit mask. */ + neon_three_same (TRUE, rs == NS_QQQ, 0, et.size); + } +} + +static void +do_neon_fcmp_absolute (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY); + /* Size field comes from bit mask. */ + neon_three_same (TRUE, rs == NS_QQQ, 1, -1); +} + +static void +do_neon_fcmp_absolute_inv (void) +{ + neon_exchange_operands (); + do_neon_fcmp_absolute (); +} + +static void +do_neon_step (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDD_QQQ); + neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY); + neon_three_same (TRUE, rs == NS_QQQ, 0, -1); +} + +static void +do_neon_abs_neg (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_F32 | N_KEY); + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= (rs == NS_QQ) << 6; + inst.instruction |= (et.type == NT_float) << 10; + inst.instruction |= neon_logbits (et.size) << 18; + + inst.instruction = neon_dp_fixup (inst.instruction); +} + +static void +do_neon_sli (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDI_QQI); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY); + int imm = inst.operands[2].imm; + constraint (imm < 0 || (unsigned)imm >= et.size, + _("immediate out of range for insert")); + neon_imm_shift (FALSE, 0, rs == NS_QQI, et, imm); +} + +static void +do_neon_sri (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDI_QQI); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY); + int imm = inst.operands[2].imm; + constraint (imm < 1 || (unsigned)imm > et.size, + _("immediate out of range for insert")); + neon_imm_shift (FALSE, 0, rs == NS_QQI, et, et.size - imm); +} + +static void +do_neon_qshlu_imm (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDI_QQI); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK | N_UNS, N_S8 | N_S16 | N_S32 | N_S64 | N_KEY); + int imm = inst.operands[2].imm; + constraint (imm < 0 || (unsigned)imm >= et.size, + _("immediate out of range for shift")); + /* Only encodes the 'U present' variant of the instruction. + In this case, signed types have OP (bit 8) set to 0. + Unsigned types have OP set to 1. */ + inst.instruction |= (et.type == NT_unsigned) << 8; + /* The rest of the bits are the same as other immediate shifts. */ + neon_imm_shift (FALSE, 0, rs == NS_QQI, et, imm); +} + +static void +do_neon_qmovn (void) +{ + struct neon_type_el et = neon_check_type (2, NS_DQ, + N_EQK | N_HLF, N_SU_16_64 | N_KEY); + /* Saturating move where operands can be signed or unsigned, and the + destination has the same signedness. */ + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + if (et.type == NT_unsigned) + inst.instruction |= 0xc0; + else + inst.instruction |= 0x80; + neon_two_same (0, 1, et.size / 2); +} + +static void +do_neon_qmovun (void) +{ + struct neon_type_el et = neon_check_type (2, NS_DQ, + N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY); + /* Saturating move with unsigned results. Operands must be signed. */ + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + neon_two_same (0, 1, et.size / 2); +} + +static void +do_neon_rshift_sat_narrow (void) +{ + /* FIXME: Types for narrowing. If operands are signed, results can be signed + or unsigned. If operands are unsigned, results must also be unsigned. */ + struct neon_type_el et = neon_check_type (2, NS_DQI, + N_EQK | N_HLF, N_SU_16_64 | N_KEY); + int imm = inst.operands[2].imm; + /* This gets the bounds check, size encoding and immediate bits calculation + right. */ + et.size /= 2; + + /* VQ{R}SHRN.I
, , #0 is a synonym for + VQMOVN.I
, . */ + if (imm == 0) + { + inst.operands[2].present = 0; + inst.instruction = N_MNEM_vqmovn; + do_neon_qmovn (); + return; + } + + constraint (imm < 1 || (unsigned)imm > et.size, + _("immediate out of range")); + neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, et.size - imm); +} + +static void +do_neon_rshift_sat_narrow_u (void) +{ + /* FIXME: Types for narrowing. If operands are signed, results can be signed + or unsigned. If operands are unsigned, results must also be unsigned. */ + struct neon_type_el et = neon_check_type (2, NS_DQI, + N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY); + int imm = inst.operands[2].imm; + /* This gets the bounds check, size encoding and immediate bits calculation + right. */ + et.size /= 2; + + /* VQSHRUN.I
, , #0 is a synonym for + VQMOVUN.I
, . */ + if (imm == 0) + { + inst.operands[2].present = 0; + inst.instruction = N_MNEM_vqmovun; + do_neon_qmovun (); + return; + } + + constraint (imm < 1 || (unsigned)imm > et.size, + _("immediate out of range")); + /* FIXME: The manual is kind of unclear about what value U should have in + VQ{R}SHRUN instructions, but U=0, op=0 definitely encodes VRSHR, so it + must be 1. */ + neon_imm_shift (TRUE, 1, 0, et, et.size - imm); +} + +static void +do_neon_movn (void) +{ + struct neon_type_el et = neon_check_type (2, NS_DQ, + N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY); + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + neon_two_same (0, 1, et.size / 2); +} + +static void +do_neon_rshift_narrow (void) +{ + struct neon_type_el et = neon_check_type (2, NS_DQI, + N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY); + int imm = inst.operands[2].imm; + /* This gets the bounds check, size encoding and immediate bits calculation + right. */ + et.size /= 2; + + /* If immediate is zero then we are a pseudo-instruction for + VMOVN.I
, */ + if (imm == 0) + { + inst.operands[2].present = 0; + inst.instruction = N_MNEM_vmovn; + do_neon_movn (); + return; + } + + constraint (imm < 1 || (unsigned)imm > et.size, + _("immediate out of range for narrowing operation")); + neon_imm_shift (FALSE, 0, 0, et, et.size - imm); +} + +static void +do_neon_shll (void) +{ + /* FIXME: Type checking when lengthening. */ + struct neon_type_el et = neon_check_type (2, NS_QDI, + N_EQK | N_DBL, N_I8 | N_I16 | N_I32 | N_KEY); + unsigned imm = inst.operands[2].imm; + + if (imm == et.size) + { + /* Maximum shift variant. */ + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= neon_logbits (et.size) << 18; + + inst.instruction = neon_dp_fixup (inst.instruction); + } + else + { + /* A more-specific type check for non-max versions. */ + et = neon_check_type (2, NS_QDI, + N_EQK | N_DBL, N_SU_32 | N_KEY); + inst.instruction = NEON_ENC_IMMED (inst.instruction); + neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, imm); + } +} + +/* Check the various types for the VCVT instruction, and return the one that + the current instruction is. */ + +static int +neon_cvt_flavour (enum neon_shape rs) +{ +#define CVT_VAR(C,X,Y) \ + et = neon_check_type (2, rs, (X), (Y)); \ + if (et.type != NT_invtype) \ + { \ + inst.error = NULL; \ + return (C); \ + } + struct neon_type_el et; + + CVT_VAR (0, N_S32, N_F32); + CVT_VAR (1, N_U32, N_F32); + CVT_VAR (2, N_F32, N_S32); + CVT_VAR (3, N_F32, N_U32); + + return -1; +#undef CVT_VAR +} + +static void +do_neon_cvt (void) +{ + /* Fixed-point conversion with #0 immediate is encoded as an integer + conversion. */ + if (inst.operands[2].present && inst.operands[2].imm != 0) + { + enum neon_shape rs = neon_check_shape (NS_DDI_QQI); + int flavour = neon_cvt_flavour (rs); + unsigned immbits = 32 - inst.operands[2].imm; + unsigned enctab[] = { 0x0000100, 0x1000100, 0x0, 0x1000000 }; + inst.instruction = NEON_ENC_IMMED (inst.instruction); + if (flavour != -1) + inst.instruction |= enctab[flavour]; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= (rs == NS_QQI) << 6; + inst.instruction |= 1 << 21; + inst.instruction |= immbits << 16; + } + else + { + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + int flavour = neon_cvt_flavour (rs); + unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080 }; + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + if (flavour != -1) + inst.instruction |= enctab[flavour]; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= (rs == NS_QQ) << 6; + inst.instruction |= 2 << 18; + } + inst.instruction = neon_dp_fixup (inst.instruction); +} + +static void +neon_move_immediate (void) +{ + enum neon_shape rs = neon_check_shape (NS_DI_QI); + struct neon_type_el et = neon_check_type (1, rs, + N_I8 | N_I16 | N_I32 | N_I64 | N_F32); + unsigned immlo, immhi = 0, immbits; + int op, cmode; + + /* We start out as an MVN instruction if OP = 1, MOV otherwise. */ + op = (inst.instruction & (1 << 5)) != 0; + + immlo = inst.operands[1].imm; + if (inst.operands[1].regisimm) + immhi = inst.operands[1].reg; + + constraint (et.size < 32 && (immlo & ~((1 << et.size) - 1)) != 0, + _("immediate has bits set outside the operand size")); + + if ((cmode = neon_cmode_for_move_imm (immlo, immhi, &immbits, &op, + et.size)) == FAIL) + { + /* Invert relevant bits only. */ + neon_invert_size (&immlo, &immhi, et.size); + /* Flip from VMOV/VMVN to VMVN/VMOV. Some immediate types are unavailable + with one or the other; those cases are caught by + neon_cmode_for_move_imm. */ + op = !op; + if ((cmode = neon_cmode_for_move_imm (immlo, immhi, &immbits, &op, + et.size)) == FAIL) + { + inst.error = _("immediate out of range"); + return; + } + } + + inst.instruction &= ~(1 << 5); + inst.instruction |= op << 5; + + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= (rs == NS_QI) << 6; + inst.instruction |= cmode << 8; + + neon_write_immbits (immbits); +} + +static void +do_neon_mvn (void) +{ + if (inst.operands[1].isreg) + { + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= (rs == NS_QQ) << 6; + } + else + { + inst.instruction = NEON_ENC_IMMED (inst.instruction); + neon_move_immediate (); + } + + inst.instruction = neon_dp_fixup (inst.instruction); +} + +/* Encode instructions of form: + + |28/24|23|22|21 20|19 16|15 12|11 8|7|6|5|4|3 0| + | U |x |D |size | Rn | Rd |x x x x|N|x|M|x| Rm | + +*/ + +static void +neon_mixed_length (struct neon_type_el et, unsigned size) +{ + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= LOW4 (inst.operands[2].reg); + inst.instruction |= HI1 (inst.operands[2].reg) << 5; + inst.instruction |= (et.type == NT_unsigned) << 24; + inst.instruction |= neon_logbits (size) << 20; + + inst.instruction = neon_dp_fixup (inst.instruction); +} + +static void +do_neon_dyadic_long (void) +{ + /* FIXME: Type checking for lengthening op. */ + struct neon_type_el et = neon_check_type (3, NS_QDD, + N_EQK | N_DBL, N_EQK, N_SU_32 | N_KEY); + neon_mixed_length (et, et.size); +} + +static void +do_neon_abal (void) +{ + struct neon_type_el et = neon_check_type (3, NS_QDD, + N_EQK | N_INT | N_DBL, N_EQK, N_SU_32 | N_KEY); + neon_mixed_length (et, et.size); +} + +static void +neon_mac_reg_scalar_long (unsigned regtypes, unsigned scalartypes) +{ + if (inst.operands[2].isscalar) + { + struct neon_type_el et = neon_check_type (2, NS_QDS, + N_EQK | N_DBL, regtypes | N_KEY); + inst.instruction = NEON_ENC_SCALAR (inst.instruction); + neon_mul_mac (et, et.type == NT_unsigned); + } + else + { + struct neon_type_el et = neon_check_type (3, NS_QDD, + N_EQK | N_DBL, N_EQK, scalartypes | N_KEY); + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + neon_mixed_length (et, et.size); + } +} + +static void +do_neon_mac_maybe_scalar_long (void) +{ + neon_mac_reg_scalar_long (N_S16 | N_S32 | N_U16 | N_U32, N_SU_32); +} + +static void +do_neon_dyadic_wide (void) +{ + struct neon_type_el et = neon_check_type (3, NS_QQD, + N_EQK | N_DBL, N_EQK | N_DBL, N_SU_32 | N_KEY); + neon_mixed_length (et, et.size); +} + +static void +do_neon_dyadic_narrow (void) +{ + struct neon_type_el et = neon_check_type (3, NS_QDD, + N_EQK | N_DBL, N_EQK, N_I16 | N_I32 | N_I64 | N_KEY); + neon_mixed_length (et, et.size / 2); +} + +static void +do_neon_mul_sat_scalar_long (void) +{ + neon_mac_reg_scalar_long (N_S16 | N_S32, N_S16 | N_S32); +} + +static void +do_neon_vmull (void) +{ + if (inst.operands[2].isscalar) + do_neon_mac_maybe_scalar_long (); + else + { + struct neon_type_el et = neon_check_type (3, NS_QDD, + N_EQK | N_DBL, N_EQK, N_SU_32 | N_P8 | N_KEY); + if (et.type == NT_poly) + inst.instruction = NEON_ENC_POLY (inst.instruction); + else + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + /* For polynomial encoding, size field must be 0b00 and the U bit must be + zero. Should be OK as-is. */ + neon_mixed_length (et, et.size); + } +} + +static void +do_neon_ext (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDDI_QQQI); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY); + unsigned imm = (inst.operands[3].imm * et.size) / 8; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= LOW4 (inst.operands[2].reg); + inst.instruction |= HI1 (inst.operands[2].reg) << 5; + inst.instruction |= (rs == NS_QQQI) << 6; + inst.instruction |= imm << 8; + + inst.instruction = neon_dp_fixup (inst.instruction); +} + +static void +do_neon_rev (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_8 | N_16 | N_32 | N_KEY); + unsigned op = (inst.instruction >> 7) & 3; + /* N (width of reversed regions) is encoded as part of the bitmask. We + extract it here to check the elements to be reversed are smaller. + Otherwise we'd get a reserved instruction. */ + unsigned elsize = (op == 2) ? 16 : (op == 1) ? 32 : (op == 0) ? 64 : 0; + assert (elsize != 0); + constraint (et.size >= elsize, + _("elements must be smaller than reversal region")); + neon_two_same (rs == NS_QQ, 1, et.size); +} + +static void +do_neon_dup (void) +{ + if (inst.operands[1].isscalar) + { + enum neon_shape rs = neon_check_shape (NS_DS_QS); + struct neon_type_el et = neon_check_type (1, rs, N_8 | N_16 | N_32); + unsigned sizebits = et.size >> 3; + unsigned dm = inst.operands[1].reg >> 3; + int logsize = neon_logbits (et.size); + unsigned x = (inst.operands[1].reg & 7) << logsize; + inst.instruction = NEON_ENC_SCALAR (inst.instruction); + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (dm); + inst.instruction |= HI1 (dm) << 5; + inst.instruction |= (rs == NS_QS) << 6; + inst.instruction |= x << 17; + inst.instruction |= sizebits << 16; + + inst.instruction = neon_dp_fixup (inst.instruction); + } + else + { + enum neon_shape rs = neon_check_shape (NS_DR_QR); + struct neon_type_el et = neon_check_type (1, rs, N_8 | N_16 | N_32); + unsigned save_cond = inst.instruction & 0xf0000000; + /* Duplicate ARM register to lanes of vector. */ + inst.instruction = NEON_ENC_ARMREG (inst.instruction); + switch (et.size) + { + case 8: inst.instruction |= 0x400000; break; + case 16: inst.instruction |= 0x000020; break; + case 32: inst.instruction |= 0x000000; break; + default: break; + } + inst.instruction |= LOW4 (inst.operands[1].reg) << 12; + inst.instruction |= LOW4 (inst.operands[0].reg) << 16; + inst.instruction |= HI1 (inst.operands[0].reg) << 7; + inst.instruction |= (rs == NS_QR) << 21; + /* The encoding for this instruction is identical for the ARM and Thumb + variants, except for the condition field. */ + if (thumb_mode) + inst.instruction |= 0xe0000000; + else + inst.instruction |= save_cond; + } +} + +/* VMOV has particularly many variations. It can be one of: + 0. VMOV , + 1. VMOV
, + (Register operations, which are VORR with Rm = Rn.) + 2. VMOV.
, # + 3. VMOV.
, # + (Immediate loads.) + 4. VMOV. , + (ARM register to scalar.) + 5. VMOV , , + (Two ARM registers to vector.) + 6. VMOV.
, + (Scalar to ARM register.) + 7. VMOV , , + (Vector to two ARM registers.) + + We should have just enough information to be able to disambiguate most of + these, apart from "Two ARM registers to vector" and "Vector to two ARM + registers" cases. For these, abuse the .regisimm operand field to signify a + Neon register. + + All the encoded bits are hardcoded by this function. + + FIXME: Some of the checking may be a bit sloppy (in a couple of cases you + can specify a type where it doesn't make sense to, and is ignored). +*/ + +static void +do_neon_mov (void) +{ + int nargs = inst.operands[0].present + inst.operands[1].present + + inst.operands[2].present; + unsigned save_cond = thumb_mode ? 0xe0000000 : inst.instruction & 0xf0000000; + + switch (nargs) + { + case 2: + /* Cases 0, 1, 2, 3, 4, 6. */ + if (inst.operands[1].isscalar) + { + /* Case 6. */ + struct neon_type_el et = neon_check_type (1, NS_IGNORE, + N_S8 | N_S16 | N_U8 | N_U16 | N_32); + unsigned logsize = neon_logbits (et.size); + unsigned dn = inst.operands[1].reg >> 3; + unsigned x = inst.operands[1].reg & 7; + unsigned abcdebits = 0; + + constraint (x >= 64 / et.size, _("scalar index out of range")); + + switch (et.size) + { + case 8: abcdebits = (et.type == NT_signed) ? 0x08 : 0x18; break; + case 16: abcdebits = (et.type == NT_signed) ? 0x01 : 0x11; break; + case 32: abcdebits = 0x00; break; + default: ; + } + + abcdebits |= x << logsize; + inst.instruction = save_cond; + inst.instruction |= 0xe100b10; + inst.instruction |= LOW4 (dn) << 16; + inst.instruction |= HI1 (dn) << 7; + inst.instruction |= inst.operands[0].reg << 12; + inst.instruction |= (abcdebits & 3) << 5; + inst.instruction |= (abcdebits >> 2) << 21; + } + else if (inst.operands[1].isreg) + { + /* Cases 0, 1, 4. */ + if (inst.operands[0].isscalar) + { + /* Case 4. */ + unsigned bcdebits = 0; + struct neon_type_el et = neon_check_type (1, NS_IGNORE, + N_8 | N_16 | N_32); + int logsize = neon_logbits (et.size); + unsigned dn = inst.operands[0].reg >> 3; + unsigned x = inst.operands[0].reg & 7; + + constraint (x >= 64 / et.size, _("scalar index out of range")); + + switch (et.size) + { + case 8: bcdebits = 0x8; break; + case 16: bcdebits = 0x1; break; + case 32: bcdebits = 0x0; break; + default: ; + } + + bcdebits |= x << logsize; + inst.instruction = save_cond; + inst.instruction |= 0xe000b10; + inst.instruction |= LOW4 (dn) << 16; + inst.instruction |= HI1 (dn) << 7; + inst.instruction |= inst.operands[1].reg << 12; + inst.instruction |= (bcdebits & 3) << 5; + inst.instruction |= (bcdebits >> 2) << 21; + } + else + { + /* Cases 0, 1. */ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + /* The architecture manual I have doesn't explicitly state which + value the U bit should have for register->register moves, but + the equivalent VORR instruction has U = 0, so do that. */ + inst.instruction = 0x0200110; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= (rs == NS_QQ) << 6; + + inst.instruction = neon_dp_fixup (inst.instruction); + } + } + else + { + /* Cases 2, 3. */ + inst.instruction = 0x0800010; + neon_move_immediate (); + inst.instruction = neon_dp_fixup (inst.instruction); + } + break; + + case 3: + /* Cases 5, 7. */ + if (inst.operands[0].regisimm) + { + /* Case 5. */ + inst.instruction = save_cond; + inst.instruction |= 0xc400b10; + inst.instruction |= LOW4 (inst.operands[0].reg); + inst.instruction |= HI1 (inst.operands[0].reg) << 5; + inst.instruction |= inst.operands[1].reg << 12; + inst.instruction |= inst.operands[2].reg << 16; + } + else + { + /* Case 7. */ + inst.instruction = save_cond; + inst.instruction |= 0xc500b10; + inst.instruction |= inst.operands[0].reg << 12; + inst.instruction |= inst.operands[1].reg << 16; + inst.instruction |= LOW4 (inst.operands[2].reg); + inst.instruction |= HI1 (inst.operands[2].reg) << 5; + } + break; + + default: + abort (); + } +} + +static void +do_neon_rshift_round_imm (void) +{ + enum neon_shape rs = neon_check_shape (NS_DDI_QQI); + struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY); + int imm = inst.operands[2].imm; + + /* imm == 0 case is encoded as VMOV for V{R}SHR. */ + if (imm == 0) + { + inst.operands[2].present = 0; + do_neon_mov (); + return; + } + + constraint (imm < 1 || (unsigned)imm > et.size, + _("immediate out of range for shift")); + neon_imm_shift (TRUE, et.type == NT_unsigned, rs == NS_QQI, et, + et.size - imm); +} + +static void +do_neon_movl (void) +{ + struct neon_type_el et = neon_check_type (2, NS_QD, + N_EQK | N_DBL, N_SU_32 | N_KEY); + unsigned sizebits = et.size >> 3; + inst.instruction |= sizebits << 19; + neon_two_same (0, et.type == NT_unsigned, -1); +} + +static void +do_neon_trn (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_8 | N_16 | N_32 | N_KEY); + inst.instruction = NEON_ENC_INTEGER (inst.instruction); + neon_two_same (rs == NS_QQ, 1, et.size); +} + +static void +do_neon_zip_uzp (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_8 | N_16 | N_32 | N_KEY); + if (rs == NS_DD && et.size == 32) + { + /* Special case: encode as VTRN.32
, . */ + inst.instruction = N_MNEM_vtrn; + do_neon_trn (); + return; + } + neon_two_same (rs == NS_QQ, 1, et.size); +} + +static void +do_neon_sat_abs_neg (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); + neon_two_same (rs == NS_QQ, 1, et.size); +} + +static void +do_neon_pair_long (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_32 | N_KEY); + /* Unsigned is encoded in OP field (bit 7) for these instruction. */ + inst.instruction |= (et.type == NT_unsigned) << 7; + neon_two_same (rs == NS_QQ, 1, et.size); +} + +static void +do_neon_recip_est (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK | N_FLT, N_F32 | N_U32 | N_KEY); + inst.instruction |= (et.type == NT_float) << 8; + neon_two_same (rs == NS_QQ, 1, et.size); +} + +static void +do_neon_cls (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_S8 | N_S16 | N_S32 | N_KEY); + neon_two_same (rs == NS_QQ, 1, et.size); +} + +static void +do_neon_clz (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK, N_I8 | N_I16 | N_I32 | N_KEY); + neon_two_same (rs == NS_QQ, 1, et.size); +} + +static void +do_neon_cnt (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + struct neon_type_el et = neon_check_type (2, rs, + N_EQK | N_INT, N_8 | N_KEY); + neon_two_same (rs == NS_QQ, 1, et.size); +} + +static void +do_neon_swp (void) +{ + enum neon_shape rs = neon_check_shape (NS_DD_QQ); + neon_two_same (rs == NS_QQ, 1, -1); +} + +static void +do_neon_tbl_tbx (void) +{ + unsigned listlenbits; + neon_check_type (1, NS_DLD, N_8); + + if (inst.operands[1].imm < 1 || inst.operands[1].imm > 4) + { + inst.error = _("bad list length for table lookup"); + return; + } + + listlenbits = inst.operands[1].imm - 1; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg) << 16; + inst.instruction |= HI1 (inst.operands[1].reg) << 7; + inst.instruction |= LOW4 (inst.operands[2].reg); + inst.instruction |= HI1 (inst.operands[2].reg) << 5; + inst.instruction |= listlenbits << 8; + + inst.instruction = neon_dp_fixup (inst.instruction); +} + +static void +do_neon_ldm_stm (void) +{ + /* P, U and L bits are part of bitmask. */ + int is_dbmode = (inst.instruction & (1 << 24)) != 0; + unsigned offsetbits = inst.operands[1].imm * 2; + + constraint (is_dbmode && !inst.operands[0].writeback, + _("writeback (!) must be used for VLDMDB and VSTMDB")); + + constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16, + _("register list must contain at least 1 and at most 16 " + "registers")); + + inst.instruction |= inst.operands[0].reg << 16; + inst.instruction |= inst.operands[0].writeback << 21; + inst.instruction |= LOW4 (inst.operands[1].reg) << 12; + inst.instruction |= HI1 (inst.operands[1].reg) << 22; + + inst.instruction |= offsetbits; + + if (thumb_mode) + inst.instruction |= 0xe0000000; +} + +static void +do_neon_ldr_str (void) +{ + unsigned offsetbits; + int offset_up = 1; + int is_ldr = (inst.instruction & (1 << 20)) != 0; + + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + + constraint (inst.reloc.pc_rel && !is_ldr, + _("PC-relative addressing unavailable with VSTR")); + + constraint (!inst.reloc.pc_rel && inst.reloc.exp.X_op != O_constant, + _("Immediate value must be a constant")); + + if (inst.reloc.exp.X_add_number < 0) + { + offset_up = 0; + offsetbits = -inst.reloc.exp.X_add_number / 4; + } + else + offsetbits = inst.reloc.exp.X_add_number / 4; + + /* FIXME: Does this catch everything? */ + constraint (!inst.operands[1].isreg || !inst.operands[1].preind + || inst.operands[1].postind || inst.operands[1].writeback + || inst.operands[1].immisreg || inst.operands[1].shifted, + BAD_ADDR_MODE); + constraint ((inst.operands[1].imm & 3) != 0, + _("Offset must be a multiple of 4")); + constraint (offsetbits != (offsetbits & 0xff), + _("Immediate offset out of range")); + + inst.instruction |= inst.operands[1].reg << 16; + inst.instruction |= offsetbits & 0xff; + inst.instruction |= offset_up << 23; + + if (thumb_mode) + inst.instruction |= 0xe0000000; + + if (inst.reloc.pc_rel) + { + if (thumb_mode) + inst.reloc.type = BFD_RELOC_ARM_T32_CP_OFF_IMM; + else + inst.reloc.type = BFD_RELOC_ARM_CP_OFF_IMM; + } + else + inst.reloc.type = BFD_RELOC_UNUSED; +} + +/* "interleave" version also handles non-interleaving register VLD1/VST1 + instructions. */ + +static void +do_neon_ld_st_interleave (void) +{ + struct neon_type_el et = neon_check_type (1, NS_IGNORE, + N_8 | N_16 | N_32 | N_64); + unsigned alignbits = 0; + unsigned idx; + /* The bits in this table go: + 0: register stride of one (0) or two (1) + 1,2: register list length, minus one (1, 2, 3, 4). + 3,4: in instruction type, minus one (VLD / VST). + We use -1 for invalid entries. */ + const int typetable[] = + { + 0x7, -1, 0xa, -1, 0x6, -1, 0x2, -1, /* VLD1 / VST1. */ + -1, -1, 0x8, 0x9, -1, -1, 0x3, -1, /* VLD2 / VST2. */ + -1, -1, -1, -1, 0x4, 0x5, -1, -1, /* VLD3 / VST3. */ + -1, -1, -1, -1, -1, -1, 0x0, 0x1 /* VLD4 / VST4. */ + }; + int typebits; + + if (inst.operands[1].immisalign) + switch (inst.operands[1].imm >> 8) + { + case 64: alignbits = 1; break; + case 128: + if (NEON_REGLIST_LENGTH (inst.operands[0].imm) == 3) + goto bad_alignment; + alignbits = 2; + break; + case 256: + if (NEON_REGLIST_LENGTH (inst.operands[0].imm) == 3) + goto bad_alignment; + alignbits = 3; + break; + default: + bad_alignment: + inst.error = _("bad alignment"); + return; + } + + inst.instruction |= alignbits << 4; + inst.instruction |= neon_logbits (et.size) << 6; + + /* Bits [4:6] of the immediate in a list specifier encode register stride + (minus 1) in bit 4, and list length in bits [5:6]. We put the of + VLD/VST in bits [9:8] of the initial bitmask. Suck it out here, look + up the right value for "type" in a table based on this value and the given + list style, then stick it back. */ + idx = ((inst.operands[0].imm >> 4) & 7) + | (((inst.instruction >> 8) & 3) << 3); + + typebits = typetable[idx]; + + constraint (typebits == -1, _("bad list type for instruction")); + + inst.instruction &= ~0xf00; + inst.instruction |= typebits << 8; +} + +/* Check alignment is valid for do_neon_ld_st_lane and do_neon_ld_dup. + *DO_ALIGN is set to 1 if the relevant alignment bit should be set, 0 + otherwise. The variable arguments are a list of pairs of legal (size, align) + values, terminated with -1. */ + +static int +neon_alignment_bit (int size, int align, int *do_align, ...) +{ + va_list ap; + int result = FAIL, thissize, thisalign; + + if (!inst.operands[1].immisalign) + { + *do_align = 0; + return SUCCESS; + } + + va_start (ap, do_align); + + do + { + thissize = va_arg (ap, int); + if (thissize == -1) + break; + thisalign = va_arg (ap, int); + + if (size == thissize && align == thisalign) + result = SUCCESS; + } + while (result != SUCCESS); + + va_end (ap); + + if (result == SUCCESS) + *do_align = 1; + else + inst.error = _("unsupported alignment for instruction"); + + return result; +} + +static void +do_neon_ld_st_lane (void) +{ + struct neon_type_el et = neon_check_type (1, NS_IGNORE, N_8 | N_16 | N_32); + int align_good, do_align = 0; + int logsize = neon_logbits (et.size); + int align = inst.operands[1].imm >> 8; + int n = (inst.instruction >> 8) & 3; + int max_el = 64 / et.size; + + constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != n + 1, + _("bad list length")); + constraint (NEON_LANE (inst.operands[0].imm) >= max_el, + _("scalar index out of range")); + constraint (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2 + && et.size == 8, + _("stride of 2 unavailable when element size is 8")); + + switch (n) + { + case 0: /* VLD1 / VST1. */ + align_good = neon_alignment_bit (et.size, align, &do_align, 16, 16, + 32, 32, -1); + if (align_good == FAIL) + return; + if (do_align) + { + unsigned alignbits = 0; + switch (et.size) + { + case 16: alignbits = 0x1; break; + case 32: alignbits = 0x3; break; + default: ; + } + inst.instruction |= alignbits << 4; + } + break; + + case 1: /* VLD2 / VST2. */ + align_good = neon_alignment_bit (et.size, align, &do_align, 8, 16, 16, 32, + 32, 64, -1); + if (align_good == FAIL) + return; + if (do_align) + inst.instruction |= 1 << 4; + break; + + case 2: /* VLD3 / VST3. */ + constraint (inst.operands[1].immisalign, + _("can't use alignment with this instruction")); + break; + + case 3: /* VLD4 / VST4. */ + align_good = neon_alignment_bit (et.size, align, &do_align, 8, 32, + 16, 64, 32, 64, 32, 128, -1); + if (align_good == FAIL) + return; + if (do_align) + { + unsigned alignbits = 0; + switch (et.size) + { + case 8: alignbits = 0x1; break; + case 16: alignbits = 0x1; break; + case 32: alignbits = (align == 64) ? 0x1 : 0x2; break; + default: ; + } + inst.instruction |= alignbits << 4; + } + break; + + default: ; + } + + /* Reg stride of 2 is encoded in bit 5 when size==16, bit 6 when size==32. */ + if (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2) + inst.instruction |= 1 << (4 + logsize); + + inst.instruction |= NEON_LANE (inst.operands[0].imm) << (logsize + 5); + inst.instruction |= logsize << 10; +} + +/* Encode single n-element structure to all lanes VLD instructions. */ + +static void +do_neon_ld_dup (void) +{ + struct neon_type_el et = neon_check_type (1, NS_IGNORE, N_8 | N_16 | N_32); + int align_good, do_align = 0; + + switch ((inst.instruction >> 8) & 3) + { + case 0: /* VLD1. */ + assert (NEON_REG_STRIDE (inst.operands[0].imm) != 2); + align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8, + &do_align, 16, 16, 32, 32, -1); + if (align_good == FAIL) + return; + switch (NEON_REGLIST_LENGTH (inst.operands[0].imm)) + { + case 1: break; + case 2: inst.instruction |= 1 << 5; break; + default: inst.error = _("bad list length"); return; + } + inst.instruction |= neon_logbits (et.size) << 6; + break; + + case 1: /* VLD2. */ + align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8, + &do_align, 8, 16, 16, 32, 32, 64, -1); + if (align_good == FAIL) + return; + constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 2, + _("bad list length")); + if (NEON_REG_STRIDE (inst.operands[0].imm) == 2) + inst.instruction |= 1 << 5; + inst.instruction |= neon_logbits (et.size) << 6; + break; + + case 2: /* VLD3. */ + constraint (inst.operands[1].immisalign, + _("can't use alignment with this instruction")); + constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 3, + _("bad list length")); + if (NEON_REG_STRIDE (inst.operands[0].imm) == 2) + inst.instruction |= 1 << 5; + inst.instruction |= neon_logbits (et.size) << 6; + break; + + case 3: /* VLD4. */ + { + int align = inst.operands[1].imm >> 8; + align_good = neon_alignment_bit (et.size, align, &do_align, 8, 32, + 16, 64, 32, 64, 32, 128, -1); + if (align_good == FAIL) + return; + constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4, + _("bad list length")); + if (NEON_REG_STRIDE (inst.operands[0].imm) == 2) + inst.instruction |= 1 << 5; + if (et.size == 32 && align == 128) + inst.instruction |= 0x3 << 6; + else + inst.instruction |= neon_logbits (et.size) << 6; + } + break; + + default: ; + } + + inst.instruction |= do_align << 4; +} + +/* Disambiguate VLD and VST instructions, and fill in common bits (those + apart from bits [11:4]. */ + +static void +do_neon_ldx_stx (void) +{ + switch (NEON_LANE (inst.operands[0].imm)) + { + case NEON_INTERLEAVE_LANES: + inst.instruction = NEON_ENC_INTERLV (inst.instruction); + do_neon_ld_st_interleave (); + break; + + case NEON_ALL_LANES: + inst.instruction = NEON_ENC_DUP (inst.instruction); + do_neon_ld_dup (); + break; + + default: + inst.instruction = NEON_ENC_LANE (inst.instruction); + do_neon_ld_st_lane (); + } + + /* L bit comes from bit mask. */ + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= inst.operands[1].reg << 16; + + if (inst.operands[1].postind) + { + int postreg = inst.operands[1].imm & 0xf; + constraint (!inst.operands[1].immisreg, + _("post-index must be a register")); + constraint (postreg == 0xd || postreg == 0xf, + _("bad register for post-index")); + inst.instruction |= postreg; + } + else if (inst.operands[1].writeback) + { + inst.instruction |= 0xd; + } + else + inst.instruction |= 0xf; + + if (thumb_mode) + inst.instruction |= 0xf9000000; + else + inst.instruction |= 0xf4000000; +} + /* Overall per-instruction processing. */ @@ -8123,6 +11488,85 @@ output_inst (const char * str) #endif } +/* Parse a Neon type specifier. *STR should point at the leading '.' + character. Does no verification at this stage that the type fits the opcode + properly. E.g., + + .i32.i32.s16 + .s32.f32 + .u16 + + Can all be legally parsed by this function. + + Fills in neon_type struct pointer with parsed information, and updates STR + to point after the parsed type specifier. Returns TRUE if this was a legal + type, FALSE if not. */ + +static bfd_boolean +parse_neon_type (struct neon_type *type, char **str) +{ + char *ptr = *str; + + if (type) + type->elems = 0; + + while (type->elems < NEON_MAX_TYPE_ELS) + { + enum neon_el_type thistype = NT_untyped; + unsigned thissize = -1u; + + if (*ptr != '.') + break; + + ptr++; + + /* Just a size without an explicit type. */ + if (ISDIGIT (*ptr)) + goto parsesize; + + switch (*ptr) + { + case 'i': thistype = NT_integer; break; + case 'f': thistype = NT_float; break; + case 'p': thistype = NT_poly; break; + case 's': thistype = NT_signed; break; + case 'u': thistype = NT_unsigned; break; + default: + as_bad (_("Unexpected character `%c' in type specifier"), *ptr); + return 0; + } + + ptr++; + + /* .f is an abbreviation for .f32. */ + if (thistype == NT_float && !ISDIGIT (*ptr)) + thissize = 32; + else + { + parsesize: + thissize = strtoul (ptr, &ptr, 10); + + if (thissize != 8 && thissize != 16 && thissize != 32 + && thissize != 64) + { + as_bad (_("Bad size %d in type specifier"), thissize); + return 0; + } + } + + if (type) + { + type->el[type->elems].type = thistype; + type->el[type->elems].size = thissize; + type->elems++; + } + } + + *str = ptr; + + return 1; +} + /* Tag values used in struct asm_opcode's tag field. */ enum opcode_tag { @@ -8222,17 +11666,30 @@ opcode_lookup (char **str) if (end == base) return 0; - /* Handle a possible width suffix. */ + /* Handle a possible width suffix and/or Neon type suffix. */ if (end[0] == '.') { - if (end[1] == 'w' && (end[2] == ' ' || end[2] == '\0')) + int offset = 2; + + if (end[1] == 'w') inst.size_req = 4; - else if (end[1] == 'n' && (end[2] == ' ' || end[2] == '\0')) + else if (end[1] == 'n') inst.size_req = 2; else - return 0; + offset = 0; - *str = end + 2; + inst.vectype.elems = 0; + + *str = end + offset; + + if (end[offset] == '.') + { + /* See if we have a Neon type suffix. */ + if (!parse_neon_type (&inst.vectype, str)) + return 0; + } + else if (end[offset] != '\0' && end[offset] != ' ') + return 0; } else *str = end; @@ -8576,11 +12033,22 @@ arm_canonicalize_symbol_name (char * name) #define REGDEF(s,n,t) { #s, n, REG_TYPE_##t, TRUE } #define REGNUM(p,n,t) REGDEF(p##n, n, t) +#define REGNUM2(p,n,t) REGDEF(p##n, 2 * n, t) #define REGSET(p,t) \ REGNUM(p, 0,t), REGNUM(p, 1,t), REGNUM(p, 2,t), REGNUM(p, 3,t), \ REGNUM(p, 4,t), REGNUM(p, 5,t), REGNUM(p, 6,t), REGNUM(p, 7,t), \ REGNUM(p, 8,t), REGNUM(p, 9,t), REGNUM(p,10,t), REGNUM(p,11,t), \ REGNUM(p,12,t), REGNUM(p,13,t), REGNUM(p,14,t), REGNUM(p,15,t) +#define REGSETH(p,t) \ + REGNUM(p,16,t), REGNUM(p,17,t), REGNUM(p,18,t), REGNUM(p,19,t), \ + REGNUM(p,20,t), REGNUM(p,21,t), REGNUM(p,22,t), REGNUM(p,23,t), \ + REGNUM(p,24,t), REGNUM(p,25,t), REGNUM(p,26,t), REGNUM(p,27,t), \ + REGNUM(p,28,t), REGNUM(p,29,t), REGNUM(p,30,t), REGNUM(p,31,t) +#define REGSET2(p,t) \ + REGNUM2(p, 0,t), REGNUM2(p, 1,t), REGNUM2(p, 2,t), REGNUM2(p, 3,t), \ + REGNUM2(p, 4,t), REGNUM2(p, 5,t), REGNUM2(p, 6,t), REGNUM2(p, 7,t), \ + REGNUM2(p, 8,t), REGNUM2(p, 9,t), REGNUM2(p,10,t), REGNUM2(p,11,t), \ + REGNUM2(p,12,t), REGNUM2(p,13,t), REGNUM2(p,14,t), REGNUM2(p,15,t) static const struct reg_entry reg_names[] = { @@ -8619,20 +12087,16 @@ static const struct reg_entry reg_names[] = REGNUM(F,4,FN), REGNUM(F,5,FN), REGNUM(F,6,FN), REGNUM(F,7, FN), /* VFP SP registers. */ - REGSET(s,VFS), - REGNUM(s,16,VFS), REGNUM(s,17,VFS), REGNUM(s,18,VFS), REGNUM(s,19,VFS), - REGNUM(s,20,VFS), REGNUM(s,21,VFS), REGNUM(s,22,VFS), REGNUM(s,23,VFS), - REGNUM(s,24,VFS), REGNUM(s,25,VFS), REGNUM(s,26,VFS), REGNUM(s,27,VFS), - REGNUM(s,28,VFS), REGNUM(s,29,VFS), REGNUM(s,30,VFS), REGNUM(s,31,VFS), - - REGSET(S,VFS), - REGNUM(S,16,VFS), REGNUM(S,17,VFS), REGNUM(S,18,VFS), REGNUM(S,19,VFS), - REGNUM(S,20,VFS), REGNUM(S,21,VFS), REGNUM(S,22,VFS), REGNUM(S,23,VFS), - REGNUM(S,24,VFS), REGNUM(S,25,VFS), REGNUM(S,26,VFS), REGNUM(S,27,VFS), - REGNUM(S,28,VFS), REGNUM(S,29,VFS), REGNUM(S,30,VFS), REGNUM(S,31,VFS), + REGSET(s,VFS), REGSET(S,VFS), + REGSETH(s,VFS), REGSETH(S,VFS), /* VFP DP Registers. */ - REGSET(d,VFD), REGSET(D,VFS), + REGSET(d,VFD), REGSET(D,VFD), + /* Extra Neon DP registers. */ + REGSETH(d,VFD), REGSETH(D,VFD), + + /* Neon QP registers. */ + REGSET2(q,NQ), REGSET2(Q,NQ), /* VFP control registers. */ REGDEF(fpsid,0,VFC), REGDEF(fpscr,1,VFC), REGDEF(fpexc,8,VFC), @@ -8971,6 +12435,30 @@ static struct asm_barrier_opt barrier_opt_names[] = #define UF(mnem, op, nops, ops, ae) \ { #mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0, ARM_VARIANT, 0, do_##ae, NULL } +/* Neon data-processing. ARM versions are unconditional with cond=0xf. + The Thumb and ARM variants are mostly the same (bits 0-23 and 24/28), so we + use the same encoding function for each. */ +#define NUF(mnem, op, nops, ops, enc) \ + { #mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0x##op, \ + ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc } + +/* Neon data processing, version which indirects through neon_enc_tab for + the various overloaded versions of opcodes. */ +#define nUF(mnem, op, nops, ops, enc) \ + { #mnem, OPS##nops ops, OT_unconditionalF, N_MNEM_##op, N_MNEM_##op, \ + ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc } + +/* Neon insn with conditional suffix for the ARM version, non-overloaded + version. */ +#define NCE(mnem, op, nops, ops, enc) \ + { #mnem, OPS##nops ops, OT_csuffix, 0x##op, 0x##op, ARM_VARIANT, \ + THUMB_VARIANT, do_##enc, do_##enc } + +/* Neon insn with conditional suffix for the ARM version, overloaded types. */ +#define nCE(mnem, op, nops, ops, enc) \ + { #mnem, OPS##nops ops, OT_csuffix, N_MNEM_##op, N_MNEM_##op, \ + ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc } + #define do_0 0 /* Thumb-only, unconditional. */ @@ -9911,13 +13399,13 @@ static const struct asm_opcode insns[] = #undef ARM_VARIANT #define ARM_VARIANT &fpu_vfp_ext_v1 /* VFP V1 (Double precision). */ /* Moves and type conversions. */ - cCE(fcpyd, eb00b40, 2, (RVD, RVD), rd_rm), + cCE(fcpyd, eb00b40, 2, (RVD, RVD), vfp_dp_rd_rm), cCE(fcvtds, eb70ac0, 2, (RVD, RVS), vfp_dp_sp_cvt), cCE(fcvtsd, eb70bc0, 2, (RVS, RVD), vfp_sp_dp_cvt), - cCE(fmdhr, e200b10, 2, (RVD, RR), rn_rd), - cCE(fmdlr, e000b10, 2, (RVD, RR), rn_rd), - cCE(fmrdh, e300b10, 2, (RR, RVD), rd_rn), - cCE(fmrdl, e100b10, 2, (RR, RVD), rd_rn), + cCE(fmdhr, e200b10, 2, (RVD, RR), vfp_dp_rn_rd), + cCE(fmdlr, e000b10, 2, (RVD, RR), vfp_dp_rn_rd), + cCE(fmrdh, e300b10, 2, (RR, RVD), vfp_dp_rd_rn), + cCE(fmrdl, e100b10, 2, (RR, RVD), vfp_dp_rd_rn), cCE(fsitod, eb80bc0, 2, (RVD, RVS), vfp_dp_sp_cvt), cCE(fuitod, eb80b40, 2, (RVD, RVS), vfp_dp_sp_cvt), cCE(ftosid, ebd0b40, 2, (RVS, RVD), vfp_sp_dp_cvt), @@ -9938,34 +13426,327 @@ static const struct asm_opcode insns[] = cCE(fstmfdd, d200b00, 2, (RRw, VRDLST), vfp_dp_ldstmdb), /* Monadic operations. */ - cCE(fabsd, eb00bc0, 2, (RVD, RVD), rd_rm), - cCE(fnegd, eb10b40, 2, (RVD, RVD), rd_rm), - cCE(fsqrtd, eb10bc0, 2, (RVD, RVD), rd_rm), + cCE(fabsd, eb00bc0, 2, (RVD, RVD), vfp_dp_rd_rm), + cCE(fnegd, eb10b40, 2, (RVD, RVD), vfp_dp_rd_rm), + cCE(fsqrtd, eb10bc0, 2, (RVD, RVD), vfp_dp_rd_rm), /* Dyadic operations. */ - cCE(faddd, e300b00, 3, (RVD, RVD, RVD), rd_rn_rm), - cCE(fsubd, e300b40, 3, (RVD, RVD, RVD), rd_rn_rm), - cCE(fmuld, e200b00, 3, (RVD, RVD, RVD), rd_rn_rm), - cCE(fdivd, e800b00, 3, (RVD, RVD, RVD), rd_rn_rm), - cCE(fmacd, e000b00, 3, (RVD, RVD, RVD), rd_rn_rm), - cCE(fmscd, e100b00, 3, (RVD, RVD, RVD), rd_rn_rm), - cCE(fnmuld, e200b40, 3, (RVD, RVD, RVD), rd_rn_rm), - cCE(fnmacd, e000b40, 3, (RVD, RVD, RVD), rd_rn_rm), - cCE(fnmscd, e100b40, 3, (RVD, RVD, RVD), rd_rn_rm), + cCE(faddd, e300b00, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm), + cCE(fsubd, e300b40, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm), + cCE(fmuld, e200b00, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm), + cCE(fdivd, e800b00, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm), + cCE(fmacd, e000b00, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm), + cCE(fmscd, e100b00, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm), + cCE(fnmuld, e200b40, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm), + cCE(fnmacd, e000b40, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm), + cCE(fnmscd, e100b40, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm), /* Comparisons. */ - cCE(fcmpd, eb40b40, 2, (RVD, RVD), rd_rm), - cCE(fcmpzd, eb50b40, 1, (RVD), rd), - cCE(fcmped, eb40bc0, 2, (RVD, RVD), rd_rm), - cCE(fcmpezd, eb50bc0, 1, (RVD), rd), + cCE(fcmpd, eb40b40, 2, (RVD, RVD), vfp_dp_rd_rm), + cCE(fcmpzd, eb50b40, 1, (RVD), vfp_dp_rd), + cCE(fcmped, eb40bc0, 2, (RVD, RVD), vfp_dp_rd_rm), + cCE(fcmpezd, eb50bc0, 1, (RVD), vfp_dp_rd), #undef ARM_VARIANT #define ARM_VARIANT &fpu_vfp_ext_v2 cCE(fmsrr, c400a10, 3, (VRSLST, RR, RR), vfp_sp2_from_reg2), cCE(fmrrs, c500a10, 3, (RR, RR, VRSLST), vfp_reg2_from_sp2), - cCE(fmdrr, c400b10, 3, (RVD, RR, RR), rm_rd_rn), - cCE(fmrrd, c500b10, 3, (RR, RR, RVD), rd_rn_rm), + cCE(fmdrr, c400b10, 3, (RVD, RR, RR), vfp_dp_rm_rd_rn), + cCE(fmrrd, c500b10, 3, (RR, RR, RVD), vfp_dp_rd_rn_rm), +#undef THUMB_VARIANT +#define THUMB_VARIANT &fpu_neon_ext_v1 +#undef ARM_VARIANT +#define ARM_VARIANT &fpu_neon_ext_v1 + /* Data processing with three registers of the same length. */ + /* integer ops, valid types S8 S16 S32 U8 U16 U32. */ + NUF(vaba, 0000710, 3, (RNDQ, RNDQ, RNDQ), neon_dyadic_i_su), + NUF(vabaq, 0000710, 3, (RNQ, RNQ, RNQ), neon_dyadic_i_su), + NUF(vhadd, 0000000, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su), + NUF(vhaddq, 0000000, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su), + NUF(vrhadd, 0000100, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su), + NUF(vrhaddq, 0000100, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su), + NUF(vhsub, 0000200, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su), + NUF(vhsubq, 0000200, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su), + /* integer ops, valid types S8 S16 S32 S64 U8 U16 U32 U64. */ + NUF(vqadd, 0000010, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su), + NUF(vqaddq, 0000010, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su), + NUF(vqsub, 0000210, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su), + NUF(vqsubq, 0000210, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su), + NUF(vrshl, 0000500, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su), + NUF(vrshlq, 0000500, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su), + NUF(vqrshl, 0000510, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su), + NUF(vqrshlq, 0000510, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su), + /* If not immediate, fall back to neon_dyadic_i64_su. + shl_imm should accept I8 I16 I32 I64, + qshl_imm should accept S8 S16 S32 S64 U8 U16 U32 U64. */ + nUF(vshl, vshl, 3, (RNDQ, oRNDQ, RNDQ_I63b), neon_shl_imm), + nUF(vshlq, vshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_shl_imm), + nUF(vqshl, vqshl, 3, (RNDQ, oRNDQ, RNDQ_I63b), neon_qshl_imm), + nUF(vqshlq, vqshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_qshl_imm), + /* Logic ops, types optional & ignored. */ + nUF(vand, vand, 2, (RNDQ, NILO), neon_logic), + nUF(vandq, vand, 2, (RNQ, NILO), neon_logic), + nUF(vbic, vbic, 2, (RNDQ, NILO), neon_logic), + nUF(vbicq, vbic, 2, (RNQ, NILO), neon_logic), + nUF(vorr, vorr, 2, (RNDQ, NILO), neon_logic), + nUF(vorrq, vorr, 2, (RNQ, NILO), neon_logic), + nUF(vorn, vorn, 2, (RNDQ, NILO), neon_logic), + nUF(vornq, vorn, 2, (RNQ, NILO), neon_logic), + nUF(veor, veor, 3, (RNDQ, oRNDQ, RNDQ), neon_logic), + nUF(veorq, veor, 3, (RNQ, oRNQ, RNQ), neon_logic), + /* Bitfield ops, untyped. */ + NUF(vbsl, 1100110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield), + NUF(vbslq, 1100110, 3, (RNQ, RNQ, RNQ), neon_bitfield), + NUF(vbit, 1200110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield), + NUF(vbitq, 1200110, 3, (RNQ, RNQ, RNQ), neon_bitfield), + NUF(vbif, 1300110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield), + NUF(vbifq, 1300110, 3, (RNQ, RNQ, RNQ), neon_bitfield), + /* Int and float variants, types S8 S16 S32 U8 U16 U32 F32. */ + nUF(vabd, vabd, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su), + nUF(vabdq, vabd, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su), + nUF(vmax, vmax, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su), + nUF(vmaxq, vmax, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su), + nUF(vmin, vmin, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su), + nUF(vminq, vmin, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su), + /* Comparisons. Types S8 S16 S32 U8 U16 U32 F32. Non-immediate versions fall + back to neon_dyadic_if_su. */ + nUF(vcge, vcge, 3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp), + nUF(vcgeq, vcge, 3, (RNQ, oRNQ, RNDQ_I0), neon_cmp), + nUF(vcgt, vcgt, 3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp), + nUF(vcgtq, vcgt, 3, (RNQ, oRNQ, RNDQ_I0), neon_cmp), + nUF(vclt, vclt, 3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp_inv), + nUF(vcltq, vclt, 3, (RNQ, oRNQ, RNDQ_I0), neon_cmp_inv), + nUF(vcle, vcle, 3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp_inv), + nUF(vcleq, vcle, 3, (RNQ, oRNQ, RNDQ_I0), neon_cmp_inv), + /* Comparison. Type I8 I16 I32 F32. Non-immediate -> neon_dyadic_if_i. */ + nUF(vceq, vceq, 3, (RNDQ, oRNDQ, RNDQ_I0), neon_ceq), + nUF(vceqq, vceq, 3, (RNQ, oRNQ, RNDQ_I0), neon_ceq), + /* As above, D registers only. */ + nUF(vpmax, vpmax, 3, (RND, oRND, RND), neon_dyadic_if_su_d), + nUF(vpmin, vpmin, 3, (RND, oRND, RND), neon_dyadic_if_su_d), + /* Int and float variants, signedness unimportant. */ + /* If not scalar, fall back to neon_dyadic_if_i. */ + nUF(vmla, vmla, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_mac_maybe_scalar), + nUF(vmlaq, vmla, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_mac_maybe_scalar), + nUF(vmls, vmls, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_mac_maybe_scalar), + nUF(vmlsq, vmls, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_mac_maybe_scalar), + nUF(vpadd, vpadd, 3, (RND, oRND, RND), neon_dyadic_if_i_d), + /* Add/sub take types I8 I16 I32 I64 F32. */ + nUF(vadd, vadd, 3, (RNDQ, oRNDQ, RNDQ), neon_addsub_if_i), + nUF(vaddq, vadd, 3, (RNQ, oRNQ, RNQ), neon_addsub_if_i), + nUF(vsub, vsub, 3, (RNDQ, oRNDQ, RNDQ), neon_addsub_if_i), + nUF(vsubq, vsub, 3, (RNQ, oRNQ, RNQ), neon_addsub_if_i), + /* vtst takes sizes 8, 16, 32. */ + NUF(vtst, 0000810, 3, (RNDQ, oRNDQ, RNDQ), neon_tst), + NUF(vtstq, 0000810, 3, (RNQ, oRNQ, RNQ), neon_tst), + /* VMUL takes I8 I16 I32 F32 P8. */ + nUF(vmul, vmul, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_mul), + nUF(vmulq, vmul, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_mul), + /* VQD{R}MULH takes S16 S32. */ + nUF(vqdmulh, vqdmulh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh), + nUF(vqdmulhq, vqdmulh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh), + nUF(vqrdmulh, vqrdmulh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh), + nUF(vqrdmulhq, vqrdmulh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh), + NUF(vacge, 0000e10, 3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute), + NUF(vacgeq, 0000e10, 3, (RNQ, oRNQ, RNQ), neon_fcmp_absolute), + NUF(vacgt, 0200e10, 3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute), + NUF(vacgtq, 0200e10, 3, (RNQ, oRNQ, RNQ), neon_fcmp_absolute), + NUF(vaclt, 0000e10, 3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute_inv), + NUF(vacltq, 0000e10, 3, (RNQ, oRNQ, RNQ), neon_fcmp_absolute_inv), + NUF(vacle, 0200e10, 3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute_inv), + NUF(vacleq, 0200e10, 3, (RNQ, oRNQ, RNQ), neon_fcmp_absolute_inv), + NUF(vrecps, 0000f10, 3, (RNDQ, oRNDQ, RNDQ), neon_step), + NUF(vrecpsq, 0000f10, 3, (RNQ, oRNQ, RNQ), neon_step), + NUF(vrsqrts, 0200f10, 3, (RNDQ, oRNDQ, RNDQ), neon_step), + NUF(vrsqrtsq, 0200f10, 3, (RNQ, oRNQ, RNQ), neon_step), + + /* Two address, int/float. Types S8 S16 S32 F32. */ + NUF(vabs, 1b10300, 2, (RNDQ, RNDQ), neon_abs_neg), + NUF(vabsq, 1b10300, 2, (RNQ, RNQ), neon_abs_neg), + NUF(vneg, 1b10380, 2, (RNDQ, RNDQ), neon_abs_neg), + NUF(vnegq, 1b10380, 2, (RNQ, RNQ), neon_abs_neg), + + /* Data processing with two registers and a shift amount. */ + /* Right shifts, and variants with rounding. + Types accepted S8 S16 S32 S64 U8 U16 U32 U64. */ + NUF(vshr, 0800010, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm), + NUF(vshrq, 0800010, 3, (RNQ, oRNQ, I64z), neon_rshift_round_imm), + NUF(vrshr, 0800210, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm), + NUF(vrshrq, 0800210, 3, (RNQ, oRNQ, I64z), neon_rshift_round_imm), + NUF(vsra, 0800110, 3, (RNDQ, oRNDQ, I64), neon_rshift_round_imm), + NUF(vsraq, 0800110, 3, (RNQ, oRNQ, I64), neon_rshift_round_imm), + NUF(vrsra, 0800310, 3, (RNDQ, oRNDQ, I64), neon_rshift_round_imm), + NUF(vrsraq, 0800310, 3, (RNQ, oRNQ, I64), neon_rshift_round_imm), + /* Shift and insert. Sizes accepted 8 16 32 64. */ + NUF(vsli, 1800510, 3, (RNDQ, oRNDQ, I63), neon_sli), + NUF(vsliq, 1800510, 3, (RNQ, oRNQ, I63), neon_sli), + NUF(vsri, 1800410, 3, (RNDQ, oRNDQ, I64), neon_sri), + NUF(vsriq, 1800410, 3, (RNQ, oRNQ, I64), neon_sri), + /* QSHL{U} immediate accepts S8 S16 S32 S64 U8 U16 U32 U64. */ + NUF(vqshlu, 1800610, 3, (RNDQ, oRNDQ, I63), neon_qshlu_imm), + NUF(vqshluq, 1800610, 3, (RNQ, oRNQ, I63), neon_qshlu_imm), + /* Right shift immediate, saturating & narrowing, with rounding variants. + Types accepted S16 S32 S64 U16 U32 U64. */ + NUF(vqshrn, 0800910, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow), + NUF(vqrshrn, 0800950, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow), + /* As above, unsigned. Types accepted S16 S32 S64. */ + NUF(vqshrun, 0800810, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow_u), + NUF(vqrshrun, 0800850, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow_u), + /* Right shift narrowing. Types accepted I16 I32 I64. */ + NUF(vshrn, 0800810, 3, (RND, RNQ, I32z), neon_rshift_narrow), + NUF(vrshrn, 0800850, 3, (RND, RNQ, I32z), neon_rshift_narrow), + /* Special case. Types S8 S16 S32 U8 U16 U32. Handles max shift variant. */ + nUF(vshll, vshll, 3, (RNQ, RND, I32), neon_shll), + /* CVT with optional immediate for fixed-point variant. */ + nUF(vcvt, vcvt, 3, (RNDQ, RNDQ, oI32b), neon_cvt), + nUF(vcvtq, vcvt, 3, (RNQ, RNQ, oI32b), neon_cvt), + + /* One register and an immediate value. All encoding special-cased! */ + NCE(vmov, 0, 1, (VMOV), neon_mov), + NCE(vmovq, 0, 1, (VMOV), neon_mov), + nUF(vmvn, vmvn, 2, (RNDQ, RNDQ_IMVNb), neon_mvn), + nUF(vmvnq, vmvn, 2, (RNQ, RNDQ_IMVNb), neon_mvn), + + /* Data processing, three registers of different lengths. */ + /* Dyadic, long insns. Types S8 S16 S32 U8 U16 U32. */ + NUF(vabal, 0800500, 3, (RNQ, RND, RND), neon_abal), + NUF(vabdl, 0800700, 3, (RNQ, RND, RND), neon_dyadic_long), + NUF(vaddl, 0800000, 3, (RNQ, RND, RND), neon_dyadic_long), + NUF(vsubl, 0800200, 3, (RNQ, RND, RND), neon_dyadic_long), + /* If not scalar, fall back to neon_dyadic_long. + Vector types as above, scalar types S16 S32 U16 U32. */ + nUF(vmlal, vmlal, 3, (RNQ, RND, RND_RNSC), neon_mac_maybe_scalar_long), + nUF(vmlsl, vmlsl, 3, (RNQ, RND, RND_RNSC), neon_mac_maybe_scalar_long), + /* Dyadic, widening insns. Types S8 S16 S32 U8 U16 U32. */ + NUF(vaddw, 0800100, 3, (RNQ, oRNQ, RND), neon_dyadic_wide), + NUF(vsubw, 0800300, 3, (RNQ, oRNQ, RND), neon_dyadic_wide), + /* Dyadic, narrowing insns. Types I16 I32 I64. */ + NUF(vaddhn, 0800400, 3, (RND, RNQ, RNQ), neon_dyadic_narrow), + NUF(vraddhn, 1800400, 3, (RND, RNQ, RNQ), neon_dyadic_narrow), + NUF(vsubhn, 0800600, 3, (RND, RNQ, RNQ), neon_dyadic_narrow), + NUF(vrsubhn, 1800600, 3, (RND, RNQ, RNQ), neon_dyadic_narrow), + /* Saturating doubling multiplies. Types S16 S32. */ + nUF(vqdmlal, vqdmlal, 3, (RNQ, RND, RND_RNSC), neon_mul_sat_scalar_long), + nUF(vqdmlsl, vqdmlsl, 3, (RNQ, RND, RND_RNSC), neon_mul_sat_scalar_long), + nUF(vqdmull, vqdmull, 3, (RNQ, RND, RND_RNSC), neon_mul_sat_scalar_long), + /* VMULL. Vector types S8 S16 S32 U8 U16 U32 P8, scalar types + S16 S32 U16 U32. */ + nUF(vmull, vmull, 3, (RNQ, RND, RND_RNSC), neon_vmull), + + /* Extract. Size 8. */ + NUF(vext, 0b00000, 4, (RNDQ, oRNDQ, RNDQ, I7), neon_ext), + NUF(vextq, 0b00000, 4, (RNQ, oRNQ, RNQ, I7), neon_ext), + + /* Two registers, miscellaneous. */ + /* Reverse. Sizes 8 16 32 (must be < size in opcode). */ + NUF(vrev64, 1b00000, 2, (RNDQ, RNDQ), neon_rev), + NUF(vrev64q, 1b00000, 2, (RNQ, RNQ), neon_rev), + NUF(vrev32, 1b00080, 2, (RNDQ, RNDQ), neon_rev), + NUF(vrev32q, 1b00080, 2, (RNQ, RNQ), neon_rev), + NUF(vrev16, 1b00100, 2, (RNDQ, RNDQ), neon_rev), + NUF(vrev16q, 1b00100, 2, (RNQ, RNQ), neon_rev), + /* Vector replicate. Sizes 8 16 32. */ + nCE(vdup, vdup, 2, (RNDQ, RR_RNSC), neon_dup), + nCE(vdupq, vdup, 2, (RNQ, RR_RNSC), neon_dup), + /* VMOVL. Types S8 S16 S32 U8 U16 U32. */ + NUF(vmovl, 0800a10, 2, (RNQ, RND), neon_movl), + /* VMOVN. Types I16 I32 I64. */ + nUF(vmovn, vmovn, 2, (RND, RNQ), neon_movn), + /* VQMOVN. Types S16 S32 S64 U16 U32 U64. */ + nUF(vqmovn, vqmovn, 2, (RND, RNQ), neon_qmovn), + /* VQMOVUN. Types S16 S32 S64. */ + nUF(vqmovun, vqmovun, 2, (RND, RNQ), neon_qmovun), + /* VZIP / VUZP. Sizes 8 16 32. */ + NUF(vzip, 1b20180, 2, (RNDQ, RNDQ), neon_zip_uzp), + NUF(vzipq, 1b20180, 2, (RNQ, RNQ), neon_zip_uzp), + NUF(vuzp, 1b20100, 2, (RNDQ, RNDQ), neon_zip_uzp), + NUF(vuzpq, 1b20100, 2, (RNQ, RNQ), neon_zip_uzp), + /* VQABS / VQNEG. Types S8 S16 S32. */ + NUF(vqabs, 1b00700, 2, (RNDQ, RNDQ), neon_sat_abs_neg), + NUF(vqabsq, 1b00700, 2, (RNQ, RNQ), neon_sat_abs_neg), + NUF(vqneg, 1b00780, 2, (RNDQ, RNDQ), neon_sat_abs_neg), + NUF(vqnegq, 1b00780, 2, (RNQ, RNQ), neon_sat_abs_neg), + /* Pairwise, lengthening. Types S8 S16 S32 U8 U16 U32. */ + NUF(vpadal, 1b00600, 2, (RNDQ, RNDQ), neon_pair_long), + NUF(vpadalq, 1b00600, 2, (RNQ, RNQ), neon_pair_long), + NUF(vpaddl, 1b00200, 2, (RNDQ, RNDQ), neon_pair_long), + NUF(vpaddlq, 1b00200, 2, (RNQ, RNQ), neon_pair_long), + /* Reciprocal estimates. Types U32 F32. */ + NUF(vrecpe, 1b30400, 2, (RNDQ, RNDQ), neon_recip_est), + NUF(vrecpeq, 1b30400, 2, (RNQ, RNQ), neon_recip_est), + NUF(vrsqrte, 1b30480, 2, (RNDQ, RNDQ), neon_recip_est), + NUF(vrsqrteq, 1b30480, 2, (RNQ, RNQ), neon_recip_est), + /* VCLS. Types S8 S16 S32. */ + NUF(vcls, 1b00400, 2, (RNDQ, RNDQ), neon_cls), + NUF(vclsq, 1b00400, 2, (RNQ, RNQ), neon_cls), + /* VCLZ. Types I8 I16 I32. */ + NUF(vclz, 1b00480, 2, (RNDQ, RNDQ), neon_clz), + NUF(vclzq, 1b00480, 2, (RNQ, RNQ), neon_clz), + /* VCNT. Size 8. */ + NUF(vcnt, 1b00500, 2, (RNDQ, RNDQ), neon_cnt), + NUF(vcntq, 1b00500, 2, (RNQ, RNQ), neon_cnt), + /* Two address, untyped. */ + NUF(vswp, 1b20000, 2, (RNDQ, RNDQ), neon_swp), + NUF(vswpq, 1b20000, 2, (RNQ, RNQ), neon_swp), + /* VTRN. Sizes 8 16 32. */ + nUF(vtrn, vtrn, 2, (RNDQ, RNDQ), neon_trn), + nUF(vtrnq, vtrn, 2, (RNQ, RNQ), neon_trn), + + /* Table lookup. Size 8. */ + NUF(vtbl, 1b00800, 3, (RND, NRDLST, RND), neon_tbl_tbx), + NUF(vtbx, 1b00840, 3, (RND, NRDLST, RND), neon_tbl_tbx), + +#undef THUMB_VARIANT +#define THUMB_VARIANT &fpu_vfp_v3_or_neon_ext +#undef ARM_VARIANT +#define ARM_VARIANT &fpu_vfp_v3_or_neon_ext + + /* Load/store instructions. Available in Neon or VFPv3. */ + NCE(vldm, c900b00, 2, (RRw, NRDLST), neon_ldm_stm), + NCE(vldmia, c900b00, 2, (RRw, NRDLST), neon_ldm_stm), + NCE(vldmdb, d100b00, 2, (RRw, NRDLST), neon_ldm_stm), + NCE(vstm, c800b00, 2, (RRw, NRDLST), neon_ldm_stm), + NCE(vstmia, c800b00, 2, (RRw, NRDLST), neon_ldm_stm), + NCE(vstmdb, d000b00, 2, (RRw, NRDLST), neon_ldm_stm), + NCE(vldr, d100b00, 2, (RND, ADDR), neon_ldr_str), + NCE(vstr, d000b00, 2, (RND, ADDR), neon_ldr_str), + + /* Neon element/structure load/store. */ + nUF(vld1, vld1, 2, (NSTRLST, ADDR), neon_ldx_stx), + nUF(vst1, vst1, 2, (NSTRLST, ADDR), neon_ldx_stx), + nUF(vld2, vld2, 2, (NSTRLST, ADDR), neon_ldx_stx), + nUF(vst2, vst2, 2, (NSTRLST, ADDR), neon_ldx_stx), + nUF(vld3, vld3, 2, (NSTRLST, ADDR), neon_ldx_stx), + nUF(vst3, vst3, 2, (NSTRLST, ADDR), neon_ldx_stx), + nUF(vld4, vld4, 2, (NSTRLST, ADDR), neon_ldx_stx), + nUF(vst4, vst4, 2, (NSTRLST, ADDR), neon_ldx_stx), + +#undef THUMB_VARIANT +#define THUMB_VARIANT &fpu_vfp_ext_v3 +#undef ARM_VARIANT +#define ARM_VARIANT &fpu_vfp_ext_v3 + + cCE(fconsts, eb00a00, 2, (RVS, I255), vfp_sp_const), + cCE(fconstd, eb00b00, 2, (RVD, I255), vfp_dp_const), + cCE(fshtos, eba0a40, 2, (RVS, I16z), vfp_sp_conv_16), + cCE(fshtod, eba0b40, 2, (RVD, I16z), vfp_dp_conv_16), + cCE(fsltos, eba0ac0, 2, (RVS, I32), vfp_sp_conv_32), + cCE(fsltod, eba0bc0, 2, (RVD, I32), vfp_dp_conv_32), + cCE(fuhtos, ebb0a40, 2, (RVS, I16z), vfp_sp_conv_16), + cCE(fuhtod, ebb0b40, 2, (RVD, I16z), vfp_dp_conv_16), + cCE(fultos, ebb0ac0, 2, (RVS, I32), vfp_sp_conv_32), + cCE(fultod, ebb0bc0, 2, (RVD, I32), vfp_dp_conv_32), + cCE(ftoshs, ebe0a40, 2, (RVS, I16z), vfp_sp_conv_16), + cCE(ftoshd, ebe0b40, 2, (RVD, I16z), vfp_dp_conv_16), + cCE(ftosls, ebe0ac0, 2, (RVS, I32), vfp_sp_conv_32), + cCE(ftosld, ebe0bc0, 2, (RVD, I32), vfp_dp_conv_32), + cCE(ftouhs, ebf0a40, 2, (RVS, I16z), vfp_sp_conv_16), + cCE(ftouhd, ebf0b40, 2, (RVD, I16z), vfp_dp_conv_16), + cCE(ftouls, ebf0ac0, 2, (RVS, I32), vfp_sp_conv_32), + cCE(ftould, ebf0bc0, 2, (RVD, I32), vfp_dp_conv_32), + +#undef THUMB_VARIANT #undef ARM_VARIANT #define ARM_VARIANT &arm_cext_xscale /* Intel XScale extensions. */ cCE(mia, e200010, 3, (RXA, RRnpc, RRnpc), xsc_mia), @@ -10236,6 +14017,10 @@ static const struct asm_opcode insns[] = #undef UE #undef UF #undef UT +#undef NUF +#undef nUF +#undef NCE +#undef nCE #undef OPS0 #undef OPS1 #undef OPS2 @@ -11147,7 +14932,7 @@ create_unwind_entry (int have_data) int tc_arm_regname_to_dw2regnum (const char *regname) { - int reg = arm_reg_parse ((char **) ®name, REG_TYPE_RN); + int reg = arm_reg_parse ((char **) ®name, REG_TYPE_RN, NULL); if (reg == FAIL) return -1; @@ -13339,7 +17124,9 @@ static const struct arm_cpu_option_table arm_cpus[] = {"arm1156t2f-s", ARM_ARCH_V6T2, FPU_ARCH_VFP_V2, NULL}, {"arm1176jz-s", ARM_ARCH_V6ZK, FPU_NONE, NULL}, {"arm1176jzf-s", ARM_ARCH_V6ZK, FPU_ARCH_VFP_V2, NULL}, - {"cortex-a8", ARM_ARCH_V7A, FPU_ARCH_VFP_V2, NULL}, + {"cortex-a8", ARM_ARCH_V7A, ARM_FEATURE(0, FPU_VFP_V3 + | FPU_NEON_EXT_V1), + NULL}, {"cortex-r4", ARM_ARCH_V7R, FPU_NONE, NULL}, {"cortex-m3", ARM_ARCH_V7M, FPU_NONE, NULL}, /* ??? XSCALE is really an architecture. */ @@ -13429,6 +17216,7 @@ static const struct arm_option_cpu_value_table arm_fpus[] = {"softvfp+vfp", FPU_ARCH_VFP_V2}, {"vfp", FPU_ARCH_VFP_V2}, {"vfp9", FPU_ARCH_VFP_V2}, + {"vfp3", FPU_ARCH_VFP_V3}, {"vfp10", FPU_ARCH_VFP_V2}, {"vfp10-r0", FPU_ARCH_VFP_V1}, {"vfpxd", FPU_ARCH_VFP_V1xD}, @@ -13437,6 +17225,7 @@ static const struct arm_option_cpu_value_table arm_fpus[] = {"arm1136jfs", FPU_ARCH_VFP_V2}, {"arm1136jf-s", FPU_ARCH_VFP_V2}, {"maverick", FPU_ARCH_MAVERICK}, + {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1}, {NULL, ARM_ARCH_NONE} }; @@ -13832,7 +17621,7 @@ aeabi_set_public_attributes (void) ARM_MERGE_FEATURE_SETS (flags, arm_arch_used, thumb_arch_used); ARM_MERGE_FEATURE_SETS (flags, flags, *mfpu_opt); ARM_MERGE_FEATURE_SETS (flags, flags, selected_cpu); - + tmp = flags; arch = 0; for (p = cpu_arch_ver; p->val; p++) @@ -13877,16 +17666,25 @@ aeabi_set_public_attributes (void) elf32_arm_add_eabi_attr_int (stdoutput, 9, ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_arch_t2) ? 2 : 1); /* Tag_VFP_arch. */ - if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_arch_vfp_v2) - || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_arch_vfp_v2)) + if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v3) + || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v3)) + elf32_arm_add_eabi_attr_int (stdoutput, 10, 3); + else if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v2) + || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v2)) elf32_arm_add_eabi_attr_int (stdoutput, 10, 2); - else if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_arch_vfp_v1) - || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_arch_vfp_v1)) + else if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v1) + || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v1) + || ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v1xd) + || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v1xd)) elf32_arm_add_eabi_attr_int (stdoutput, 10, 1); /* Tag_WMMX_arch. */ if (ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_cext_iwmmxt) || ARM_CPU_HAS_FEATURE (arm_arch_used, arm_cext_iwmmxt)) elf32_arm_add_eabi_attr_int (stdoutput, 11, 1); + /* Tag_NEON_arch. */ + if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_neon_ext_v1) + || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_neon_ext_v1)) + elf32_arm_add_eabi_attr_int (stdoutput, 12, 1); } /* Add the .ARM.attributes section. */