target-arm queue:

* implementation of first part of the A64 Neon instruction set * v8 AArch32 rounding and 16<->64 fp conversion instructions * fix MIDR value on Zynq boards * some minor bugfixes/code cleanups -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) iQIcBAABCAAGBQJS67v6AAoJEDwlJe0UNgzeQfQP/26wGyhmrdBzbEa3OK2LRRHz 4zif7QXnIo9teo18ch0RaRthNAJX7NpfnhhCxrRWiw2RynvD/YrDwUKptxDZOzTx msVFL64ZdR+7xtTS0etYpMgUzF/MXwlq11I0dmod9eD1tETUaEcC8XJkGZDr6o16 2uTNjAfBjOBK0mDoYdKMJuz7d+qqhl8s/TRYTA50kWUERFfXxI03/axHtCkI7/fa aT8c2tkyVs8pqCmcs5BXU1Wn+8H15EeYIWIqGEB8sWTGsxfie95aIlqex2m1RtVI Rij231HimiQdVkTMVeI5Yxu400PeQ8igGbjDyLJB6UgzX/5otGKaKL1x2LI0ArxU oCjY3WV4OGnLYdSyMsSocemd/nBkeH2ntCDY3yl1XCqavR09UlgNgnffBAWq2qgA twmCXXGjj4B0KLECqAMtsLspVqSPaZDjrh6+lzMJB04xS0A7M0g+GPpioibg8XXR caGYbcRlCd8UwtLx8w1dnephiZtUTnits/C0bcHfmUwQ+JHGwLnCmRhuJnVze6mQ 62Of6A++P0+/1CofMxn+DqfZT6uwVM2gcexE2YLShnD3k147yWjivAoE8kpEDNSt kfOf6CcdAORw4Q3/1KLF59WYyvJQ5bwx9Tlg3BY+uQspBJooGL9klDV0J+DQc170 XoOYkaUPoHELWhW+nH9P =KIzb -----END PGP SIGNATURE----- Merge remote-tracking branch 'pmaydell/tags/pull-target-arm-20140131' into staging target-arm queue: * implementation of first part of the A64 Neon instruction set * v8 AArch32 rounding and 16<->64 fp conversion instructions * fix MIDR value on Zynq boards * some minor bugfixes/code cleanups # gpg: Signature made Fri 31 Jan 2014 15:06:34 GMT using RSA key ID 14360CDE # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" * pmaydell/tags/pull-target-arm-20140131: (34 commits) arm_gic: Fix GICD_ICPENDR and GICD_ISPENDR writes arm_gic: Introduce define for GIC_NR_SGIS target-arm: A64: Add SIMD shift by immediate target-arm: A64: Add simple SIMD 3-same floating point ops target-arm: A64: Add integer ops from SIMD 3-same group target-arm: A64: Add logic ops from SIMD 3 same group target-arm: A64: Add top level decode for SIMD 3-same group target-arm: A64: Add SIMD scalar 3 same add, sub and compare ops target-arm: A64: Add SIMD three-different ABDL instructions target-arm: A64: Add SIMD three-different multiply accumulate insns target-arm: Add AArch32 SIMD VCVTA, VCVTN, VCVTP and VCVTM target-arm: Add AArch32 FP VCVTA, VCVTN, VCVTP and VCVTM target-arm: Add AArch32 SIMD VRINTA, VRINTN, VRINTP, VRINTM, VRINTZ target-arm: Add set_neon_rmode helper target-arm: Add support for AArch32 SIMD VRINTX target-arm: Add support for AArch32 FP VRINTX target-arm: Add support for AArch32 FP VRINTZ target-arm: Add support for AArch32 FP VRINTR target-arm: Add AArch32 FP VRINTA, VRINTN, VRINTP and VRINTM target-arm: Move arm_rmode_to_sf to a shared location. ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2014-02-01 23:06:11 +00:00 · 2014-02-01 23:06:11 +00:00 · b4a8c9ae97
commit b4a8c9ae97
parent 850bbe1b94 5b0adce156
16 changed files with 3087 additions and 70 deletions
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@ -173,6 +173,11 @@ static void default_reset_secondary(ARMCPU *cpu,
    env->regs[15] = info->smp_loader_start;
 }

+static inline bool have_dtb(const struct arm_boot_info *info)
+{
+    return info->dtb_filename || info->get_dtb;
+}
+
 #define WRITE_WORD(p, value) do { \
    stl_phys_notdirty(p, value);  \
    p += 4;                       \
@ -421,7 +426,7 @@ static void do_cpu_reset(void *opaque)
                    env->regs[15] = info->loader_start;
                }

-                if (!info->dtb_filename) {
+                if (!have_dtb(info)) {
                    if (old_param) {
                        set_kernel_args_old(info);
                    } else {
@ -542,7 +547,7 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
        /* for device tree boot, we pass the DTB directly in r2. Otherwise
         * we point to the kernel args.
         */
-        if (info->dtb_filename || info->get_dtb) {
+        if (have_dtb(info)) {
            /* Place the DTB after the initrd in memory. Note that some
             * kernels will trash anything in the 4K page the initrd
             * ends in, so make sure the DTB isn't caught up in that.
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@ -37,6 +37,7 @@
 #define IRQ_OFFSET 32 /* pic interrupts start from index 32 */

 #define MPCORE_PERIPHBASE 0xF8F00000
+#define ZYNQ_BOARD_MIDR 0x413FC090

 static const int dma_irqs[8] = {
    46, 47, 48, 49, 72, 73, 74, 75
@ -125,6 +126,12 @@ static void zynq_init(QEMUMachineInitArgs *args)

    cpu = ARM_CPU(object_new(object_class_get_name(cpu_oc)));

+    object_property_set_int(OBJECT(cpu), ZYNQ_BOARD_MIDR, "midr", &err);
+    if (err) {
+        error_report("%s", error_get_pretty(err));
+        exit(1);
+    }
+
    object_property_set_int(OBJECT(cpu), MPCORE_PERIPHBASE, "reset-cbar", &err);
    if (err) {
        error_report("%s", error_get_pretty(err));
--- a/hw/display/blizzard_template.h
+++ b/hw/display/blizzard_template.h
@ -18,25 +18,35 @@
 * with this program; if not, see <http://www.gnu.org/licenses/>.
 */

-#define SKIP_PIXEL(to)		to += deststep
+#define SKIP_PIXEL(to)         (to += deststep)
 #if DEPTH == 8
-# define PIXEL_TYPE		uint8_t
-# define COPY_PIXEL(to, from)	*to = from; SKIP_PIXEL(to)
-# define COPY_PIXEL1(to, from)	*to ++ = from
+# define PIXEL_TYPE            uint8_t
+# define COPY_PIXEL(to, from)  do { *to = from; SKIP_PIXEL(to); } while (0)
+# define COPY_PIXEL1(to, from) (*to++ = from)
 #elif DEPTH == 15 || DEPTH == 16
-# define PIXEL_TYPE		uint16_t
-# define COPY_PIXEL(to, from)	*to = from; SKIP_PIXEL(to)
-# define COPY_PIXEL1(to, from)	*to ++ = from
+# define PIXEL_TYPE            uint16_t
+# define COPY_PIXEL(to, from)  do { *to = from; SKIP_PIXEL(to); } while (0)
+# define COPY_PIXEL1(to, from) (*to++ = from)
 #elif DEPTH == 24
-# define PIXEL_TYPE		uint8_t
-# define COPY_PIXEL(to, from)	\
-    to[0] = from; to[1] = (from) >> 8; to[2] = (from) >> 16; SKIP_PIXEL(to)
-# define COPY_PIXEL1(to, from)	\
-    *to ++ = from; *to ++ = (from) >> 8; *to ++ = (from) >> 16
+# define PIXEL_TYPE            uint8_t
+# define COPY_PIXEL(to, from) \
+    do {                      \
+        to[0] = from;         \
+        to[1] = (from) >> 8;  \
+        to[2] = (from) >> 16; \
+        SKIP_PIXEL(to);       \
+    } while (0)
+
+# define COPY_PIXEL1(to, from) \
+    do {                       \
+        *to++ = from;          \
+        *to++ = (from) >> 8;   \
+        *to++ = (from) >> 16;  \
+    } while (0)
 #elif DEPTH == 32
-# define PIXEL_TYPE		uint32_t
-# define COPY_PIXEL(to, from)	*to = from; SKIP_PIXEL(to)
-# define COPY_PIXEL1(to, from)	*to ++ = from
+# define PIXEL_TYPE            uint32_t
+# define COPY_PIXEL(to, from)  do { *to = from; SKIP_PIXEL(to); } while (0)
+# define COPY_PIXEL1(to, from) (*to++ = from)
 #else
 # error unknown bit depth
 #endif
--- a/hw/display/pl110_template.h
+++ b/hw/display/pl110_template.h
@ -14,12 +14,16 @@
 #if BITS == 8
 #define COPY_PIXEL(to, from) *(to++) = from
 #elif BITS == 15 || BITS == 16
-#define COPY_PIXEL(to, from) *(uint16_t *)to = from; to += 2;
+#define COPY_PIXEL(to, from) do { *(uint16_t *)to = from; to += 2; } while (0)
 #elif BITS == 24
-#define COPY_PIXEL(to, from) \
-  *(to++) = from; *(to++) = (from) >> 8; *(to++) = (from) >> 16
+#define COPY_PIXEL(to, from)    \
+    do {                        \
+        *(to++) = from;         \
+        *(to++) = (from) >> 8;  \
+        *(to++) = (from) >> 16; \
+    } while (0)
 #elif BITS == 32
-#define COPY_PIXEL(to, from) *(uint32_t *)to = from; to += 4;
+#define COPY_PIXEL(to, from) do { *(uint32_t *)to = from; to += 4; } while (0)
 #else
 #error unknown bit depth
 #endif
--- a/hw/display/pxa2xx_template.h
+++ b/hw/display/pxa2xx_template.h
@ -11,14 +11,26 @@

 # define SKIP_PIXEL(to)		to += deststep
 #if BITS == 8
-# define COPY_PIXEL(to, from)	*to = from; SKIP_PIXEL(to)
+# define COPY_PIXEL(to, from)  do { *to = from; SKIP_PIXEL(to); } while (0)
 #elif BITS == 15 || BITS == 16
-# define COPY_PIXEL(to, from)	*(uint16_t *) to = from; SKIP_PIXEL(to)
+# define COPY_PIXEL(to, from)    \
+    do {                         \
+        *(uint16_t *) to = from; \
+        SKIP_PIXEL(to);          \
+    } while (0)
 #elif BITS == 24
-# define COPY_PIXEL(to, from)	\
-	*(uint16_t *) to = from; *(to + 2) = (from) >> 16; SKIP_PIXEL(to)
+# define COPY_PIXEL(to, from)     \
+    do {                          \
+        *(uint16_t *) to = from;  \
+        *(to + 2) = (from) >> 16; \
+        SKIP_PIXEL(to);           \
+    } while (0)
 #elif BITS == 32
-# define COPY_PIXEL(to, from)	*(uint32_t *) to = from; SKIP_PIXEL(to)
+# define COPY_PIXEL(to, from)    \
+    do {                         \
+        *(uint32_t *) to = from; \
+        SKIP_PIXEL(to);          \
+    } while (0)
 #else
 # error unknown bit depth
 #endif
--- a/hw/display/tc6393xb_template.h
+++ b/hw/display/tc6393xb_template.h
@ -22,14 +22,18 @@
 */

 #if BITS == 8
-# define SET_PIXEL(addr, color)	*(uint8_t*)addr = color;
+# define SET_PIXEL(addr, color)  (*(uint8_t *)addr = color)
 #elif BITS == 15 || BITS == 16
-# define SET_PIXEL(addr, color)	*(uint16_t*)addr = color;
+# define SET_PIXEL(addr, color)  (*(uint16_t *)addr = color)
 #elif BITS == 24
-# define SET_PIXEL(addr, color)	\
-    addr[0] = color; addr[1] = (color) >> 8; addr[2] = (color) >> 16;
+# define SET_PIXEL(addr, color)  \
+    do {                         \
+        addr[0] = color;         \
+        addr[1] = (color) >> 8;  \
+        addr[2] = (color) >> 16; \
+    } while (0)
 #elif BITS == 32
-# define SET_PIXEL(addr, color)	*(uint32_t*)addr = color;
+# define SET_PIXEL(addr, color)  (*(uint32_t *)addr = color)
 #else
 # error unknown bit depth
 #endif
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@ -380,8 +380,10 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
        irq = (offset - 0x100) * 8 + GIC_BASE_IRQ;
        if (irq >= s->num_irq)
            goto bad_reg;
-        if (irq < 16)
-          value = 0xff;
+        if (irq < GIC_NR_SGIS) {
+            value = 0xff;
+        }
+
        for (i = 0; i < 8; i++) {
            if (value & (1 << i)) {
                int mask =
@ -406,8 +408,10 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
        irq = (offset - 0x180) * 8 + GIC_BASE_IRQ;
        if (irq >= s->num_irq)
            goto bad_reg;
-        if (irq < 16)
-          value = 0;
+        if (irq < GIC_NR_SGIS) {
+            value = 0;
+        }
+
        for (i = 0; i < 8; i++) {
            if (value & (1 << i)) {
                int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
@ -423,8 +427,9 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
        irq = (offset - 0x200) * 8 + GIC_BASE_IRQ;
        if (irq >= s->num_irq)
            goto bad_reg;
-        if (irq < 16)
-          irq = 0;
+        if (irq < GIC_NR_SGIS) {
+            value = 0;
+        }

        for (i = 0; i < 8; i++) {
            if (value & (1 << i)) {
@ -436,6 +441,10 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
        irq = (offset - 0x280) * 8 + GIC_BASE_IRQ;
        if (irq >= s->num_irq)
            goto bad_reg;
+        if (irq < GIC_NR_SGIS) {
+            value = 0;
+        }
+
        for (i = 0; i < 8; i++) {
            /* ??? This currently clears the pending bit for all CPUs, even
               for per-CPU interrupts.  It's unclear whether this is the
--- a/include/hw/intc/arm_gic_common.h
+++ b/include/hw/intc/arm_gic_common.h
@ -27,6 +27,7 @@
 #define GIC_MAXIRQ 1020
 /* First 32 are private to each CPU (SGIs and PPIs). */
 #define GIC_INTERNAL 32
+#define GIC_NR_SGIS 16
 /* Maximum number of possible CPU interfaces, determined by GIC architecture */
 #define GIC_NCPU 8

--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@ -982,6 +982,7 @@ static const ARMCPUInfo arm_cpus[] = {

 static Property arm_cpu_properties[] = {
    DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
+    DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
    DEFINE_PROP_END_OF_LIST()
 };

--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@ -496,6 +496,8 @@ enum arm_fprounding {
    FPROUNDING_ODD
 };

+int arm_rmode_to_sf(int rmode);
+
 enum arm_cpu_mode {
  ARM_CPU_MODE_USR = 0x10,
  ARM_CPU_MODE_FIQ = 0x11,
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@ -122,3 +122,34 @@ uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
 {
    return float_rel_to_flags(float64_compare(x, y, fp_status));
 }
+
+uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
+                          uint32_t rn, uint32_t numregs)
+{
+    /* Helper function for SIMD TBL and TBX. We have to do the table
+     * lookup part for the 64 bits worth of indices we're passed in.
+     * result is the initial results vector (either zeroes for TBL
+     * or some guest values for TBX), rn the register number where
+     * the table starts, and numregs the number of registers in the table.
+     * We return the results of the lookups.
+     */
+    int shift;
+
+    for (shift = 0; shift < 64; shift += 8) {
+        int index = extract64(indices, shift, 8);
+        if (index < 16 * numregs) {
+            /* Convert index (a byte offset into the virtual table
+             * which is a series of 128-bit vectors concatenated)
+             * into the correct vfp.regs[] element plus a bit offset
+             * into that element, bearing in mind that the table
+             * can wrap around from V31 to V0.
+             */
+            int elt = (rn * 2 + (index >> 3)) % 64;
+            int bitidx = (index & 7) * 8;
+            uint64_t val = extract64(env->vfp.regs[elt], bitidx, 8);
+
+            result = deposit64(result, shift, 8, val);
+        }
+    }
+    return result;
+}
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@ -26,3 +26,4 @@ DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
 DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
 DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
 DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
+DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@ -4048,6 +4048,23 @@ uint32_t HELPER(set_rmode)(uint32_t rmode, CPUARMState *env)
    return prev_rmode;
 }

+/* Set the current fp rounding mode in the standard fp status and return
+ * the old one. This is for NEON instructions that need to change the
+ * rounding mode but wish to use the standard FPSCR values for everything
+ * else. Always set the rounding mode back to the correct value after
+ * modifying it.
+ * The argument is a softfloat float_round_ value.
+ */
+uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
+{
+    float_status *fp_status = &env->vfp.standard_fp_status;
+
+    uint32_t prev_rmode = get_float_rounding_mode(fp_status);
+    set_float_rounding_mode(rmode, fp_status);
+
+    return prev_rmode;
+}
+
 /* Half precision conversions.  */
 static float32 do_fcvt_f16_to_f32(uint32_t a, CPUARMState *env, float_status *s)
 {
@ -4418,3 +4435,31 @@ float64 HELPER(rintd)(float64 x, void *fp_status)

    return ret;
 }
+
+/* Convert ARM rounding mode to softfloat */
+int arm_rmode_to_sf(int rmode)
+{
+    switch (rmode) {
+    case FPROUNDING_TIEAWAY:
+        rmode = float_round_ties_away;
+        break;
+    case FPROUNDING_ODD:
+        /* FIXME: add support for TIEAWAY and ODD */
+        qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
+                      rmode);
+    case FPROUNDING_TIEEVEN:
+    default:
+        rmode = float_round_nearest_even;
+        break;
+    case FPROUNDING_POSINF:
+        rmode = float_round_up;
+        break;
+    case FPROUNDING_NEGINF:
+        rmode = float_round_down;
+        break;
+    case FPROUNDING_ZERO:
+        rmode = float_round_to_zero;
+        break;
+    }
+    return rmode;
+}
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@ -149,6 +149,7 @@ DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
 DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr)

 DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, env)
+DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env)

 DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
 DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@ -2759,6 +2759,113 @@ static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
    return 0;
 }

+static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
+                        int rounding)
+{
+    TCGv_ptr fpst = get_fpstatus_ptr(0);
+    TCGv_i32 tcg_rmode;
+
+    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+
+    if (dp) {
+        TCGv_i64 tcg_op;
+        TCGv_i64 tcg_res;
+        tcg_op = tcg_temp_new_i64();
+        tcg_res = tcg_temp_new_i64();
+        tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
+        gen_helper_rintd(tcg_res, tcg_op, fpst);
+        tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
+        tcg_temp_free_i64(tcg_op);
+        tcg_temp_free_i64(tcg_res);
+    } else {
+        TCGv_i32 tcg_op;
+        TCGv_i32 tcg_res;
+        tcg_op = tcg_temp_new_i32();
+        tcg_res = tcg_temp_new_i32();
+        tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
+        gen_helper_rints(tcg_res, tcg_op, fpst);
+        tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
+        tcg_temp_free_i32(tcg_op);
+        tcg_temp_free_i32(tcg_res);
+    }
+
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+    tcg_temp_free_i32(tcg_rmode);
+
+    tcg_temp_free_ptr(fpst);
+    return 0;
+}
+
+static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
+                       int rounding)
+{
+    bool is_signed = extract32(insn, 7, 1);
+    TCGv_ptr fpst = get_fpstatus_ptr(0);
+    TCGv_i32 tcg_rmode, tcg_shift;
+
+    tcg_shift = tcg_const_i32(0);
+
+    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+
+    if (dp) {
+        TCGv_i64 tcg_double, tcg_res;
+        TCGv_i32 tcg_tmp;
+        /* Rd is encoded as a single precision register even when the source
+         * is double precision.
+         */
+        rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
+        tcg_double = tcg_temp_new_i64();
+        tcg_res = tcg_temp_new_i64();
+        tcg_tmp = tcg_temp_new_i32();
+        tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
+        if (is_signed) {
+            gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
+        } else {
+            gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
+        }
+        tcg_gen_trunc_i64_i32(tcg_tmp, tcg_res);
+        tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
+        tcg_temp_free_i32(tcg_tmp);
+        tcg_temp_free_i64(tcg_res);
+        tcg_temp_free_i64(tcg_double);
+    } else {
+        TCGv_i32 tcg_single, tcg_res;
+        tcg_single = tcg_temp_new_i32();
+        tcg_res = tcg_temp_new_i32();
+        tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
+        if (is_signed) {
+            gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
+        } else {
+            gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
+        }
+        tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
+        tcg_temp_free_i32(tcg_res);
+        tcg_temp_free_i32(tcg_single);
+    }
+
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+    tcg_temp_free_i32(tcg_rmode);
+
+    tcg_temp_free_i32(tcg_shift);
+
+    tcg_temp_free_ptr(fpst);
+
+    return 0;
+}
+
+/* Table for converting the most common AArch32 encoding of
+ * rounding mode to arm_fprounding order (which matches the
+ * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
+ */
+static const uint8_t fp_decode_rm[] = {
+    FPROUNDING_TIEAWAY,
+    FPROUNDING_TIEEVEN,
+    FPROUNDING_POSINF,
+    FPROUNDING_NEGINF,
+};
+
 static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
 {
    uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
@ -2781,6 +2888,14 @@ static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
        return handle_vsel(insn, rd, rn, rm, dp);
    } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
        return handle_vminmaxnm(insn, rd, rn, rm, dp);
+    } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
+        /* VRINTA, VRINTN, VRINTP, VRINTM */
+        int rounding = fp_decode_rm[extract32(insn, 16, 2)];
+        return handle_vrint(insn, rd, rm, dp, rounding);
+    } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
+        /* VCVTA, VCVTN, VCVTP, VCVTM */
+        int rounding = fp_decode_rm[extract32(insn, 16, 2)];
+        return handle_vcvt(insn, rd, rm, dp, rounding);
    }
    return 1;
 }
@ -3325,6 +3440,44 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                        gen_vfp_F1_ld0(dp);
                        gen_vfp_cmpe(dp);
                        break;
+                    case 12: /* vrintr */
+                    {
+                        TCGv_ptr fpst = get_fpstatus_ptr(0);
+                        if (dp) {
+                            gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
+                        } else {
+                            gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
+                        }
+                        tcg_temp_free_ptr(fpst);
+                        break;
+                    }
+                    case 13: /* vrintz */
+                    {
+                        TCGv_ptr fpst = get_fpstatus_ptr(0);
+                        TCGv_i32 tcg_rmode;
+                        tcg_rmode = tcg_const_i32(float_round_to_zero);
+                        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+                        if (dp) {
+                            gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
+                        } else {
+                            gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
+                        }
+                        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+                        tcg_temp_free_i32(tcg_rmode);
+                        tcg_temp_free_ptr(fpst);
+                        break;
+                    }
+                    case 14: /* vrintx */
+                    {
+                        TCGv_ptr fpst = get_fpstatus_ptr(0);
+                        if (dp) {
+                            gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
+                        } else {
+                            gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
+                        }
+                        tcg_temp_free_ptr(fpst);
+                        break;
+                    }
                    case 15: /* single<->double conversion */
                        if (dp)
                            gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
@ -4617,8 +4770,22 @@ static const uint8_t neon_3r_sizes[] = {
 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
 #define NEON_2RM_VSHLL 38
+#define NEON_2RM_VRINTN 40
+#define NEON_2RM_VRINTX 41
+#define NEON_2RM_VRINTA 42
+#define NEON_2RM_VRINTZ 43
 #define NEON_2RM_VCVT_F16_F32 44
+#define NEON_2RM_VRINTM 45
 #define NEON_2RM_VCVT_F32_F16 46
+#define NEON_2RM_VRINTP 47
+#define NEON_2RM_VCVTAU 48
+#define NEON_2RM_VCVTAS 49
+#define NEON_2RM_VCVTNU 50
+#define NEON_2RM_VCVTNS 51
+#define NEON_2RM_VCVTPU 52
+#define NEON_2RM_VCVTPS 53
+#define NEON_2RM_VCVTMU 54
+#define NEON_2RM_VCVTMS 55
 #define NEON_2RM_VRECPE 56
 #define NEON_2RM_VRSQRTE 57
 #define NEON_2RM_VRECPE_F 58
@ -4632,6 +4799,9 @@ static int neon_2rm_is_float_op(int op)
 {
    /* Return true if this neon 2reg-misc op is float-to-float */
    return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
+            (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
+            op == NEON_2RM_VRINTM ||
+            (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
            op >= NEON_2RM_VRECPE_F);
 }

@ -4676,8 +4846,22 @@ static const uint8_t neon_2rm_sizes[] = {
    [NEON_2RM_VMOVN] = 0x7,
    [NEON_2RM_VQMOVN] = 0x7,
    [NEON_2RM_VSHLL] = 0x7,
+    [NEON_2RM_VRINTN] = 0x4,
+    [NEON_2RM_VRINTX] = 0x4,
+    [NEON_2RM_VRINTA] = 0x4,
+    [NEON_2RM_VRINTZ] = 0x4,
    [NEON_2RM_VCVT_F16_F32] = 0x2,
+    [NEON_2RM_VRINTM] = 0x4,
    [NEON_2RM_VCVT_F32_F16] = 0x2,
+    [NEON_2RM_VRINTP] = 0x4,
+    [NEON_2RM_VCVTAU] = 0x4,
+    [NEON_2RM_VCVTAS] = 0x4,
+    [NEON_2RM_VCVTNU] = 0x4,
+    [NEON_2RM_VCVTNS] = 0x4,
+    [NEON_2RM_VCVTPU] = 0x4,
+    [NEON_2RM_VCVTPS] = 0x4,
+    [NEON_2RM_VCVTMU] = 0x4,
+    [NEON_2RM_VCVTMS] = 0x4,
    [NEON_2RM_VRECPE] = 0x4,
    [NEON_2RM_VRSQRTE] = 0x4,
    [NEON_2RM_VRECPE_F] = 0x4,
@ -6388,6 +6572,73 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                            }
                            neon_store_reg(rm, pass, tmp2);
                            break;
+                        case NEON_2RM_VRINTN:
+                        case NEON_2RM_VRINTA:
+                        case NEON_2RM_VRINTM:
+                        case NEON_2RM_VRINTP:
+                        case NEON_2RM_VRINTZ:
+                        {
+                            TCGv_i32 tcg_rmode;
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            int rmode;
+
+                            if (op == NEON_2RM_VRINTZ) {
+                                rmode = FPROUNDING_ZERO;
+                            } else {
+                                rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
+                            }
+
+                            tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+                                                      cpu_env);
+                            gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
+                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+                                                      cpu_env);
+                            tcg_temp_free_ptr(fpstatus);
+                            tcg_temp_free_i32(tcg_rmode);
+                            break;
+                        }
+                        case NEON_2RM_VRINTX:
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
+                            break;
+                        }
+                        case NEON_2RM_VCVTAU:
+                        case NEON_2RM_VCVTAS:
+                        case NEON_2RM_VCVTNU:
+                        case NEON_2RM_VCVTNS:
+                        case NEON_2RM_VCVTPU:
+                        case NEON_2RM_VCVTPS:
+                        case NEON_2RM_VCVTMU:
+                        case NEON_2RM_VCVTMS:
+                        {
+                            bool is_signed = !extract32(insn, 7, 1);
+                            TCGv_ptr fpst = get_fpstatus_ptr(1);
+                            TCGv_i32 tcg_rmode, tcg_shift;
+                            int rmode = fp_decode_rm[extract32(insn, 8, 2)];
+
+                            tcg_shift = tcg_const_i32(0);
+                            tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+                                                      cpu_env);
+
+                            if (is_signed) {
+                                gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
+                                                     tcg_shift, fpst);
+                            } else {
+                                gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
+                                                     tcg_shift, fpst);
+                            }
+
+                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+                                                      cpu_env);
+                            tcg_temp_free_i32(tcg_rmode);
+                            tcg_temp_free_i32(tcg_shift);
+                            tcg_temp_free_ptr(fpst);
+                            break;
+                        }
                        case NEON_2RM_VRECPE:
                            gen_helper_recpe_u32(tmp, tmp, cpu_env);
                            break;