From 873169022aa58daabd10979002f8009c7e5f3f05 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:48 +0000
Subject: [PATCH 01/30] hw/intc/arm_gic: Fix NVIC assertion failure

Commit 40d225009ef accidentally changed the behaviour of
gic_acknowledge_irq() for the NVIC. The NVIC doesn't have SGIs,
so this meant we hit an assertion:
  gic_acknowledge_irq: Assertion `s->sgi_pending[irq][cpu] != 0' failed.

Return NVIC acknowledge-irq to its previous behaviour, like 11MPCore.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 hw/intc/arm_gic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 93eaa6b2fa..955b8d4945 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -189,7 +189,7 @@ uint32_t gic_acknowledge_irq(GICState *s, int cpu)
     }
     s->last_active[irq][cpu] = s->running_irq[cpu];
 
-    if (s->revision == REV_11MPCORE) {
+    if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
         /* Clear pending flags for both level and edge triggered interrupts.
          * Level triggered IRQs will be reasserted once they become inactive.
          */

From f5e51e7f10d6dbbeac268a7defc89831c62eff12 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:48 +0000
Subject: [PATCH 02/30] target-arm: A64: Implement plain vector SIMD indexed
 element insns

Implement all the SIMD vector x indexed element instructions
in the subcategory which are not 'long' ops.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/helper-a64.c    |  26 ++++
 target-arm/helper-a64.h    |   2 +
 target-arm/translate-a64.c | 248 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 275 insertions(+), 1 deletion(-)

diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 6ca958afb1..fe90a5c2d4 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -123,6 +123,32 @@ uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
     return float_rel_to_flags(float64_compare(x, y, fp_status));
 }
 
+float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+
+    if ((float32_is_zero(a) && float32_is_infinity(b)) ||
+        (float32_is_infinity(a) && float32_is_zero(b))) {
+        /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+        return make_float32((1U << 30) |
+                            ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
+    }
+    return float32_mul(a, b, fpst);
+}
+
+float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+
+    if ((float64_is_zero(a) && float64_is_infinity(b)) ||
+        (float64_is_infinity(a) && float64_is_zero(b))) {
+        /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+        return make_float64((1ULL << 62) |
+                            ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
+    }
+    return float64_mul(a, b, fpst);
+}
+
 uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
                           uint32_t rn, uint32_t numregs)
 {
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index 99832ee55e..84310e874b 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -27,3 +27,5 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
 DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
 DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
 DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
+DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
+DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index d60223af8e..a96ee4aa4b 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7813,7 +7813,253 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
  */
 static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    /* This encoding has two kinds of instruction:
+     *  normal, where we perform elt x idxelt => elt for each
+     *     element in the vector
+     *  long, where we perform elt x idxelt and generate a result of
+     *     double the width of the input element
+     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
+     */
+    bool is_q = extract32(insn, 30, 1);
+    bool u = extract32(insn, 29, 1);
+    int size = extract32(insn, 22, 2);
+    int l = extract32(insn, 21, 1);
+    int m = extract32(insn, 20, 1);
+    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
+    int rm = extract32(insn, 16, 4);
+    int opcode = extract32(insn, 12, 4);
+    int h = extract32(insn, 11, 1);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+    bool is_long = false;
+    bool is_fp = false;
+    int index;
+    TCGv_ptr fpst;
+
+    switch (opcode) {
+    case 0x0: /* MLA */
+    case 0x4: /* MLS */
+        if (!u) {
+            unallocated_encoding(s);
+            return;
+        }
+        break;
+    case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+    case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+    case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
+        is_long = true;
+        break;
+    case 0x3: /* SQDMLAL, SQDMLAL2 */
+    case 0x7: /* SQDMLSL, SQDMLSL2 */
+    case 0xb: /* SQDMULL, SQDMULL2 */
+        is_long = true;
+        /* fall through */
+    case 0xc: /* SQDMULH */
+    case 0xd: /* SQRDMULH */
+    case 0x8: /* MUL */
+        if (u) {
+            unallocated_encoding(s);
+            return;
+        }
+        break;
+    case 0x1: /* FMLA */
+    case 0x5: /* FMLS */
+        if (u) {
+            unallocated_encoding(s);
+            return;
+        }
+        /* fall through */
+    case 0x9: /* FMUL, FMULX */
+        if (!extract32(size, 1, 1)) {
+            unallocated_encoding(s);
+            return;
+        }
+        is_fp = true;
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (is_fp) {
+        /* low bit of size indicates single/double */
+        size = extract32(size, 0, 1) ? 3 : 2;
+        if (size == 2) {
+            index = h << 1 | l;
+        } else {
+            if (l || !is_q) {
+                unallocated_encoding(s);
+                return;
+            }
+            index = h;
+        }
+        rm |= (m << 4);
+    } else {
+        switch (size) {
+        case 1:
+            index = h << 2 | l << 1 | m;
+            break;
+        case 2:
+            index = h << 1 | l;
+            rm |= (m << 4);
+            break;
+        default:
+            unallocated_encoding(s);
+            return;
+        }
+    }
+
+    if (is_long) {
+        unsupported_encoding(s, insn);
+        return;
+    }
+
+    if (is_fp) {
+        fpst = get_fpstatus_ptr();
+    } else {
+        TCGV_UNUSED_PTR(fpst);
+    }
+
+    if (size == 3) {
+        TCGv_i64 tcg_idx = tcg_temp_new_i64();
+        int pass;
+
+        assert(is_fp && is_q && !is_long);
+
+        read_vec_element(s, tcg_idx, rm, index, MO_64);
+
+        for (pass = 0; pass < 2; pass++) {
+            TCGv_i64 tcg_op = tcg_temp_new_i64();
+            TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+            read_vec_element(s, tcg_op, rn, pass, MO_64);
+
+            switch (opcode) {
+            case 0x5: /* FMLS */
+                /* As usual for ARM, separate negation for fused multiply-add */
+                gen_helper_vfp_negd(tcg_op, tcg_op);
+                /* fall through */
+            case 0x1: /* FMLA */
+                read_vec_element(s, tcg_res, rd, pass, MO_64);
+                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+                break;
+            case 0x9: /* FMUL, FMULX */
+                if (u) {
+                    gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
+                } else {
+                    gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
+                }
+                break;
+            default:
+                g_assert_not_reached();
+            }
+
+            write_vec_element(s, tcg_res, rd, pass, MO_64);
+            tcg_temp_free_i64(tcg_op);
+            tcg_temp_free_i64(tcg_res);
+        }
+
+        tcg_temp_free_i64(tcg_idx);
+    } else if (!is_long) {
+        /* 32 bit floating point, or 16 or 32 bit integer */
+        TCGv_i32 tcg_idx = tcg_temp_new_i32();
+        int pass;
+
+        read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+        if (size == 1) {
+            /* The simplest way to handle the 16x16 indexed ops is to duplicate
+             * the index into both halves of the 32 bit tcg_idx and then use
+             * the usual Neon helpers.
+             */
+            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+        }
+
+        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
+            TCGv_i32 tcg_op = tcg_temp_new_i32();
+            TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+
+            switch (opcode) {
+            case 0x0: /* MLA */
+            case 0x4: /* MLS */
+            case 0x8: /* MUL */
+            {
+                static NeonGenTwoOpFn * const fns[2][2] = {
+                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
+                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
+                };
+                NeonGenTwoOpFn *genfn;
+                bool is_sub = opcode == 0x4;
+
+                if (size == 1) {
+                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
+                } else {
+                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
+                }
+                if (opcode == 0x8) {
+                    break;
+                }
+                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
+                genfn = fns[size - 1][is_sub];
+                genfn(tcg_res, tcg_op, tcg_res);
+                break;
+            }
+            case 0x5: /* FMLS */
+                /* As usual for ARM, separate negation for fused multiply-add */
+                gen_helper_vfp_negs(tcg_op, tcg_op);
+                /* fall through */
+            case 0x1: /* FMLA */
+                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+                gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+                break;
+            case 0x9: /* FMUL, FMULX */
+                if (u) {
+                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
+                } else {
+                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+                }
+                break;
+            case 0xc: /* SQDMULH */
+                if (size == 1) {
+                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
+                                               tcg_op, tcg_idx);
+                } else {
+                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
+                                               tcg_op, tcg_idx);
+                }
+                break;
+            case 0xd: /* SQRDMULH */
+                if (size == 1) {
+                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
+                                                tcg_op, tcg_idx);
+                } else {
+                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
+                                                tcg_op, tcg_idx);
+                }
+                break;
+            default:
+                g_assert_not_reached();
+            }
+
+            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+            tcg_temp_free_i32(tcg_op);
+            tcg_temp_free_i32(tcg_res);
+        }
+
+        tcg_temp_free_i32(tcg_idx);
+
+        if (!is_q) {
+            clear_vec_high(s, rd);
+        }
+    } else {
+        /* long ops: 16x16->32 or 32x32->64 */
+    }
+
+    if (!TCGV_IS_UNUSED_PTR(fpst)) {
+        tcg_temp_free_ptr(fpst);
+    }
 }
 
 /* C3.6.19 Crypto AES

From c44ad1fddcf5a3deea3fb5cc340935bb11ccfb8e Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:49 +0000
Subject: [PATCH 03/30] target-arm: A64: Implement long vector x indexed insns

Implement the 'long' operations in the vector x indexed
element category.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/translate-a64.c | 144 +++++++++++++++++++++++++++++++++++--
 1 file changed, 139 insertions(+), 5 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index a96ee4aa4b..f1cd08aa7e 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7909,11 +7909,6 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
         }
     }
 
-    if (is_long) {
-        unsupported_encoding(s, insn);
-        return;
-    }
-
     if (is_fp) {
         fpst = get_fpstatus_ptr();
     } else {
@@ -8055,6 +8050,145 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
         }
     } else {
         /* long ops: 16x16->32 or 32x32->64 */
+        TCGv_i64 tcg_res[2];
+        int pass;
+        bool satop = extract32(opcode, 0, 1);
+        TCGMemOp memop = MO_32;
+
+        if (satop || !u) {
+            memop |= MO_SIGN;
+        }
+
+        if (size == 2) {
+            TCGv_i64 tcg_idx = tcg_temp_new_i64();
+
+            read_vec_element(s, tcg_idx, rm, index, memop);
+
+            for (pass = 0; pass < 2; pass++) {
+                TCGv_i64 tcg_op = tcg_temp_new_i64();
+                TCGv_i64 tcg_passres;
+
+                read_vec_element(s, tcg_op, rn, pass + (is_q * 2), memop);
+
+                tcg_res[pass] = tcg_temp_new_i64();
+
+                if (opcode == 0xa || opcode == 0xb) {
+                    /* Non-accumulating ops */
+                    tcg_passres = tcg_res[pass];
+                } else {
+                    tcg_passres = tcg_temp_new_i64();
+                }
+
+                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
+                tcg_temp_free_i64(tcg_op);
+
+                if (satop) {
+                    /* saturating, doubling */
+                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+                                                      tcg_passres, tcg_passres);
+                }
+
+                if (opcode == 0xa || opcode == 0xb) {
+                    continue;
+                }
+
+                /* Accumulating op: handle accumulate step */
+                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+
+                switch (opcode) {
+                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+                    break;
+                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+                    break;
+                case 0x7: /* SQDMLSL, SQDMLSL2 */
+                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
+                    /* fall through */
+                case 0x3: /* SQDMLAL, SQDMLAL2 */
+                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+                                                      tcg_res[pass],
+                                                      tcg_passres);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+                tcg_temp_free_i64(tcg_passres);
+            }
+            tcg_temp_free_i64(tcg_idx);
+        } else {
+            TCGv_i32 tcg_idx = tcg_temp_new_i32();
+
+            assert(size == 1);
+            read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+            /* The simplest way to handle the 16x16 indexed ops is to duplicate
+             * the index into both halves of the 32 bit tcg_idx and then use
+             * the usual Neon helpers.
+             */
+            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+
+            for (pass = 0; pass < 2; pass++) {
+                TCGv_i32 tcg_op = tcg_temp_new_i32();
+                TCGv_i64 tcg_passres;
+
+                read_vec_element_i32(s, tcg_op, rn, pass + (is_q * 2), MO_32);
+                tcg_res[pass] = tcg_temp_new_i64();
+
+                if (opcode == 0xa || opcode == 0xb) {
+                    /* Non-accumulating ops */
+                    tcg_passres = tcg_res[pass];
+                } else {
+                    tcg_passres = tcg_temp_new_i64();
+                }
+
+                if (memop & MO_SIGN) {
+                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
+                } else {
+                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
+                }
+                if (satop) {
+                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+                                                      tcg_passres, tcg_passres);
+                }
+                tcg_temp_free_i32(tcg_op);
+
+                if (opcode == 0xa || opcode == 0xb) {
+                    continue;
+                }
+
+                /* Accumulating op: handle accumulate step */
+                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+
+                switch (opcode) {
+                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
+                                             tcg_passres);
+                    break;
+                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
+                                             tcg_passres);
+                    break;
+                case 0x7: /* SQDMLSL, SQDMLSL2 */
+                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
+                    /* fall through */
+                case 0x3: /* SQDMLAL, SQDMLAL2 */
+                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+                                                      tcg_res[pass],
+                                                      tcg_passres);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+                tcg_temp_free_i64(tcg_passres);
+            }
+            tcg_temp_free_i32(tcg_idx);
+        }
+
+        for (pass = 0; pass < 2; pass++) {
+            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+            tcg_temp_free_i64(tcg_res[pass]);
+        }
     }
 
     if (!TCGV_IS_UNUSED_PTR(fpst)) {

From 9f82e0ff4b21b3fce86115597e92b01fba448635 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:49 +0000
Subject: [PATCH 04/30] target-arm: A64: Implement SIMD scalar indexed
 instructions

Implement the SIMD scalar indexed instructions. The encoding
here is nearly identical to the vector indexed grouping, so
we combine the two.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/translate-a64.c | 115 ++++++++++++++++++++++++++-----------
 1 file changed, 82 insertions(+), 33 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index f1cd08aa7e..a52a3e7b5f 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -6322,17 +6322,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
     }
 }
 
-/* C3.6.13 AdvSIMD scalar x indexed element
- *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
- * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
- * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
- * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
- */
-static void disas_simd_scalar_indexed(DisasContext *s, uint32_t insn)
-{
-    unsupported_encoding(s, insn);
-}
-
 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
                                  int immh, int immb, int opcode, int rn, int rd)
@@ -7805,13 +7794,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
     }
 }
 
-/* C3.6.18 AdvSIMD vector x indexed element
+/* C3.6.13 AdvSIMD scalar x indexed element
+ *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
+ * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
+ * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * C3.6.18 AdvSIMD vector x indexed element
  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
  */
-static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
+static void disas_simd_indexed(DisasContext *s, uint32_t insn)
 {
     /* This encoding has two kinds of instruction:
      *  normal, where we perform elt x idxelt => elt for each
@@ -7820,6 +7814,7 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
      *     double the width of the input element
      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
      */
+    bool is_scalar = extract32(insn, 28, 1);
     bool is_q = extract32(insn, 30, 1);
     bool u = extract32(insn, 29, 1);
     int size = extract32(insn, 22, 2);
@@ -7839,7 +7834,7 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
     switch (opcode) {
     case 0x0: /* MLA */
     case 0x4: /* MLS */
-        if (!u) {
+        if (!u || is_scalar) {
             unallocated_encoding(s);
             return;
         }
@@ -7847,6 +7842,10 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
+        if (is_scalar) {
+            unallocated_encoding(s);
+            return;
+        }
         is_long = true;
         break;
     case 0x3: /* SQDMLAL, SQDMLAL2 */
@@ -7856,12 +7855,17 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
         /* fall through */
     case 0xc: /* SQDMULH */
     case 0xd: /* SQRDMULH */
-    case 0x8: /* MUL */
         if (u) {
             unallocated_encoding(s);
             return;
         }
         break;
+    case 0x8: /* MUL */
+        if (u || is_scalar) {
+            unallocated_encoding(s);
+            return;
+        }
+        break;
     case 0x1: /* FMLA */
     case 0x5: /* FMLS */
         if (u) {
@@ -7923,7 +7927,7 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
 
         read_vec_element(s, tcg_idx, rm, index, MO_64);
 
-        for (pass = 0; pass < 2; pass++) {
+        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
             TCGv_i64 tcg_op = tcg_temp_new_i64();
             TCGv_i64 tcg_res = tcg_temp_new_i64();
 
@@ -7954,15 +7958,28 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
             tcg_temp_free_i64(tcg_res);
         }
 
+        if (is_scalar) {
+            clear_vec_high(s, rd);
+        }
+
         tcg_temp_free_i64(tcg_idx);
     } else if (!is_long) {
-        /* 32 bit floating point, or 16 or 32 bit integer */
+        /* 32 bit floating point, or 16 or 32 bit integer.
+         * For the 16 bit scalar case we use the usual Neon helpers and
+         * rely on the fact that 0 op 0 == 0 with no side effects.
+         */
         TCGv_i32 tcg_idx = tcg_temp_new_i32();
-        int pass;
+        int pass, maxpasses;
+
+        if (is_scalar) {
+            maxpasses = 1;
+        } else {
+            maxpasses = is_q ? 4 : 2;
+        }
 
         read_vec_element_i32(s, tcg_idx, rm, index, size);
 
-        if (size == 1) {
+        if (size == 1 && !is_scalar) {
             /* The simplest way to handle the 16x16 indexed ops is to duplicate
              * the index into both halves of the 32 bit tcg_idx and then use
              * the usual Neon helpers.
@@ -7970,11 +7987,11 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
         }
 
-        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
+        for (pass = 0; pass < maxpasses; pass++) {
             TCGv_i32 tcg_op = tcg_temp_new_i32();
             TCGv_i32 tcg_res = tcg_temp_new_i32();
 
-            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
 
             switch (opcode) {
             case 0x0: /* MLA */
@@ -8038,7 +8055,12 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
                 g_assert_not_reached();
             }
 
-            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+            if (is_scalar) {
+                write_fp_sreg(s, rd, tcg_res);
+            } else {
+                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+            }
+
             tcg_temp_free_i32(tcg_op);
             tcg_temp_free_i32(tcg_res);
         }
@@ -8064,11 +8086,18 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
 
             read_vec_element(s, tcg_idx, rm, index, memop);
 
-            for (pass = 0; pass < 2; pass++) {
+            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
                 TCGv_i64 tcg_op = tcg_temp_new_i64();
                 TCGv_i64 tcg_passres;
+                int passelt;
 
-                read_vec_element(s, tcg_op, rn, pass + (is_q * 2), memop);
+                if (is_scalar) {
+                    passelt = 0;
+                } else {
+                    passelt = pass + (is_q * 2);
+                }
+
+                read_vec_element(s, tcg_op, rn, passelt, memop);
 
                 tcg_res[pass] = tcg_temp_new_i64();
 
@@ -8116,23 +8145,35 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
                 tcg_temp_free_i64(tcg_passres);
             }
             tcg_temp_free_i64(tcg_idx);
+
+            if (is_scalar) {
+                clear_vec_high(s, rd);
+            }
         } else {
             TCGv_i32 tcg_idx = tcg_temp_new_i32();
 
             assert(size == 1);
             read_vec_element_i32(s, tcg_idx, rm, index, size);
 
-            /* The simplest way to handle the 16x16 indexed ops is to duplicate
-             * the index into both halves of the 32 bit tcg_idx and then use
-             * the usual Neon helpers.
-             */
-            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+            if (!is_scalar) {
+                /* The simplest way to handle the 16x16 indexed ops is to
+                 * duplicate the index into both halves of the 32 bit tcg_idx
+                 * and then use the usual Neon helpers.
+                 */
+                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+            }
 
-            for (pass = 0; pass < 2; pass++) {
+            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
                 TCGv_i32 tcg_op = tcg_temp_new_i32();
                 TCGv_i64 tcg_passres;
 
-                read_vec_element_i32(s, tcg_op, rn, pass + (is_q * 2), MO_32);
+                if (is_scalar) {
+                    read_vec_element_i32(s, tcg_op, rn, pass, size);
+                } else {
+                    read_vec_element_i32(s, tcg_op, rn,
+                                         pass + (is_q * 2), MO_32);
+                }
+
                 tcg_res[pass] = tcg_temp_new_i64();
 
                 if (opcode == 0xa || opcode == 0xb) {
@@ -8183,6 +8224,14 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
                 tcg_temp_free_i64(tcg_passres);
             }
             tcg_temp_free_i32(tcg_idx);
+
+            if (is_scalar) {
+                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
+            }
+        }
+
+        if (is_scalar) {
+            tcg_res[1] = tcg_const_i64(0);
         }
 
         for (pass = 0; pass < 2; pass++) {
@@ -8241,7 +8290,7 @@ static const AArch64DecodeTable data_proc_simd[] = {
     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
     { 0x0e000400, 0x9fe08400, disas_simd_copy },
-    { 0x0f000000, 0x9f000400, disas_simd_indexed_vector },
+    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
@@ -8253,7 +8302,7 @@ static const AArch64DecodeTable data_proc_simd[] = {
     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
-    { 0x5f000000, 0xdf000400, disas_simd_scalar_indexed },
+    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },

From b033cd3d0021bee24931d0118fbd34e8c8d8b5af Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:49 +0000
Subject: [PATCH 05/30] target-arm: A64: Implement scalar three different
 instructions

Implement the scalar three different instruction group:
it only has three instructions in it.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/translate-a64.c | 95 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 94 insertions(+), 1 deletion(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index a52a3e7b5f..13f7f77c45 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -5838,7 +5838,100 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
  */
 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    bool is_u = extract32(insn, 29, 1);
+    int size = extract32(insn, 22, 2);
+    int opcode = extract32(insn, 12, 4);
+    int rm = extract32(insn, 16, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+
+    if (is_u) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (opcode) {
+    case 0x9: /* SQDMLAL, SQDMLAL2 */
+    case 0xb: /* SQDMLSL, SQDMLSL2 */
+    case 0xd: /* SQDMULL, SQDMULL2 */
+        if (size == 0 || size == 3) {
+            unallocated_encoding(s);
+            return;
+        }
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (size == 2) {
+        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+        TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
+        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
+
+        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
+        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
+
+        switch (opcode) {
+        case 0xd: /* SQDMULL, SQDMULL2 */
+            break;
+        case 0xb: /* SQDMLSL, SQDMLSL2 */
+            tcg_gen_neg_i64(tcg_res, tcg_res);
+            /* fall through */
+        case 0x9: /* SQDMLAL, SQDMLAL2 */
+            read_vec_element(s, tcg_op1, rd, 0, MO_64);
+            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
+                                              tcg_res, tcg_op1);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        write_fp_dreg(s, rd, tcg_res);
+
+        tcg_temp_free_i64(tcg_op1);
+        tcg_temp_free_i64(tcg_op2);
+        tcg_temp_free_i64(tcg_res);
+    } else {
+        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+        TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+        read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
+        read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
+
+        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
+        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
+
+        switch (opcode) {
+        case 0xd: /* SQDMULL, SQDMULL2 */
+            break;
+        case 0xb: /* SQDMLSL, SQDMLSL2 */
+            gen_helper_neon_negl_u32(tcg_res, tcg_res);
+            /* fall through */
+        case 0x9: /* SQDMLAL, SQDMLAL2 */
+        {
+            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
+            read_vec_element(s, tcg_op3, rd, 0, MO_32);
+            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
+                                              tcg_res, tcg_op3);
+            tcg_temp_free_i64(tcg_op3);
+            break;
+        }
+        default:
+            g_assert_not_reached();
+        }
+
+        tcg_gen_ext32u_i64(tcg_res, tcg_res);
+        write_fp_dreg(s, rd, tcg_res);
+
+        tcg_temp_free_i32(tcg_op1);
+        tcg_temp_free_i32(tcg_op2);
+        tcg_temp_free_i64(tcg_res);
+    }
 }
 
 static void handle_3same_64(DisasContext *s, int opcode, bool u,

From 8908f4d1850dbfd0de442e8deaed2f41821cdb89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Thu, 20 Feb 2014 10:35:49 +0000
Subject: [PATCH 06/30] target-arm: A64: Implement SIMD FP compare and set
 insns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds all forms of the SIMD floating point and set instructions:

  FCM(GT|GE|EQ|LE|LT)

Most of the heavy lifting is done by either the existing neon helpers or
some new helpers for the 64bit double cases. Most of the code paths are
common although the 2misc versions are a little special as they compare
against zero.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
[PMM: fixed some minor bugs, added the 2-misc-scalar encoding]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/helper-a64.c    |  19 ++++
 target-arm/helper-a64.h    |   3 +
 target-arm/translate-a64.c | 197 ++++++++++++++++++++++++++++++++++---
 3 files changed, 207 insertions(+), 12 deletions(-)

diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index fe90a5c2d4..b4cab51e62 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -179,3 +179,22 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
     }
     return result;
 }
+
+/* 64bit/double versions of the neon float compare functions */
+uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    return -float64_eq_quiet(a, b, fpst);
+}
+
+uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    return -float64_le(b, a, fpst);
+}
+
+uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    return -float64_lt(b, a, fpst);
+}
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index 84310e874b..bf20466f50 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -29,3 +29,6 @@ DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
 DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
 DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
 DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
+DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
+DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
+DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 13f7f77c45..bcf32a12b4 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -77,6 +77,8 @@ typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
+typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
 
 /* initialize TCG globals.  */
 void a64_translate_init(void)
@@ -6049,6 +6051,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
             case 0x1a: /* FADD */
                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x1c: /* FCMEQ */
+                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x1e: /* FMAX */
                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
@@ -6064,6 +6069,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
             case 0x5b: /* FMUL */
                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x5c: /* FCMGE */
+                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x5f: /* FDIV */
                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
@@ -6071,6 +6079,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
                 gen_helper_vfp_absd(tcg_res, tcg_res);
                 break;
+            case 0x7c: /* FCMGT */
+                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             default:
                 g_assert_not_reached();
             }
@@ -6093,6 +6104,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
             case 0x1a: /* FADD */
                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x1c: /* FCMEQ */
+                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x1e: /* FMAX */
                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
@@ -6111,6 +6125,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
             case 0x5b: /* FMUL */
                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x5c: /* FCMGE */
+                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x5f: /* FDIV */
                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
@@ -6118,6 +6135,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
                 gen_helper_vfp_abss(tcg_res, tcg_res);
                 break;
+            case 0x7c: /* FCMGT */
+                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             default:
                 g_assert_not_reached();
             }
@@ -6168,15 +6188,15 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
         switch (fpopcode) {
         case 0x1b: /* FMULX */
-        case 0x1c: /* FCMEQ */
         case 0x1f: /* FRECPS */
         case 0x3f: /* FRSQRTS */
-        case 0x5c: /* FCMGE */
         case 0x5d: /* FACGE */
-        case 0x7c: /* FCMGT */
         case 0x7d: /* FACGT */
             unsupported_encoding(s, insn);
             return;
+        case 0x1c: /* FCMEQ */
+        case 0x5c: /* FCMGE */
+        case 0x7c: /* FCMGT */
         case 0x7a: /* FABD */
             break;
         default:
@@ -6361,6 +6381,115 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
     }
 }
 
+static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
+                                   bool is_scalar, bool is_u, bool is_q,
+                                   int size, int rn, int rd)
+{
+    bool is_double = (size == 3);
+    TCGv_ptr fpst = get_fpstatus_ptr();
+
+    if (is_double) {
+        TCGv_i64 tcg_op = tcg_temp_new_i64();
+        TCGv_i64 tcg_zero = tcg_const_i64(0);
+        TCGv_i64 tcg_res = tcg_temp_new_i64();
+        NeonGenTwoDoubleOPFn *genfn;
+        bool swap = false;
+        int pass;
+
+        switch (opcode) {
+        case 0x2e: /* FCMLT (zero) */
+            swap = true;
+            /* fallthrough */
+        case 0x2c: /* FCMGT (zero) */
+            genfn = gen_helper_neon_cgt_f64;
+            break;
+        case 0x2d: /* FCMEQ (zero) */
+            genfn = gen_helper_neon_ceq_f64;
+            break;
+        case 0x6d: /* FCMLE (zero) */
+            swap = true;
+            /* fall through */
+        case 0x6c: /* FCMGE (zero) */
+            genfn = gen_helper_neon_cge_f64;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+            read_vec_element(s, tcg_op, rn, pass, MO_64);
+            if (swap) {
+                genfn(tcg_res, tcg_zero, tcg_op, fpst);
+            } else {
+                genfn(tcg_res, tcg_op, tcg_zero, fpst);
+            }
+            write_vec_element(s, tcg_res, rd, pass, MO_64);
+        }
+        if (is_scalar) {
+            clear_vec_high(s, rd);
+        }
+
+        tcg_temp_free_i64(tcg_res);
+        tcg_temp_free_i64(tcg_zero);
+        tcg_temp_free_i64(tcg_op);
+    } else {
+        TCGv_i32 tcg_op = tcg_temp_new_i32();
+        TCGv_i32 tcg_zero = tcg_const_i32(0);
+        TCGv_i32 tcg_res = tcg_temp_new_i32();
+        NeonGenTwoSingleOPFn *genfn;
+        bool swap = false;
+        int pass, maxpasses;
+
+        switch (opcode) {
+        case 0x2e: /* FCMLT (zero) */
+            swap = true;
+            /* fall through */
+        case 0x2c: /* FCMGT (zero) */
+            genfn = gen_helper_neon_cgt_f32;
+            break;
+        case 0x2d: /* FCMEQ (zero) */
+            genfn = gen_helper_neon_ceq_f32;
+            break;
+        case 0x6d: /* FCMLE (zero) */
+            swap = true;
+            /* fall through */
+        case 0x6c: /* FCMGE (zero) */
+            genfn = gen_helper_neon_cge_f32;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        if (is_scalar) {
+            maxpasses = 1;
+        } else {
+            maxpasses = is_q ? 4 : 2;
+        }
+
+        for (pass = 0; pass < maxpasses; pass++) {
+            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+            if (swap) {
+                genfn(tcg_res, tcg_zero, tcg_op, fpst);
+            } else {
+                genfn(tcg_res, tcg_op, tcg_zero, fpst);
+            }
+            if (is_scalar) {
+                write_fp_sreg(s, rd, tcg_res);
+            } else {
+                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+            }
+        }
+        tcg_temp_free_i32(tcg_res);
+        tcg_temp_free_i32(tcg_zero);
+        tcg_temp_free_i32(tcg_op);
+        if (!is_q && !is_scalar) {
+            clear_vec_high(s, rd);
+        }
+    }
+
+    tcg_temp_free_ptr(fpst);
+}
+
 /* C3.6.12 AdvSIMD scalar two reg misc
  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
  * +-----+---+-----------+------+-----------+--------+-----+------+------+
@@ -6390,9 +6519,47 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
             return;
         }
         break;
+    case 0xc ... 0xf:
+    case 0x16 ... 0x1d:
+    case 0x1f:
+        /* Floating point: U, size[1] and opcode indicate operation;
+         * size[0] indicates single or double precision.
+         */
+        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
+        size = extract32(size, 0, 1) ? 3 : 2;
+        switch (opcode) {
+        case 0x2c: /* FCMGT (zero) */
+        case 0x2d: /* FCMEQ (zero) */
+        case 0x2e: /* FCMLT (zero) */
+        case 0x6c: /* FCMGE (zero) */
+        case 0x6d: /* FCMLE (zero) */
+            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
+            return;
+        case 0x1a: /* FCVTNS */
+        case 0x1b: /* FCVTMS */
+        case 0x1c: /* FCVTAS */
+        case 0x1d: /* SCVTF */
+        case 0x3a: /* FCVTPS */
+        case 0x3b: /* FCVTZS */
+        case 0x3d: /* FRECPE */
+        case 0x3f: /* FRECPX */
+        case 0x56: /* FCVTXN, FCVTXN2 */
+        case 0x5a: /* FCVTNU */
+        case 0x5b: /* FCVTMU */
+        case 0x5c: /* FCVTAU */
+        case 0x5d: /* UCVTF */
+        case 0x7a: /* FCVTPU */
+        case 0x7b: /* FCVTZU */
+        case 0x7d: /* FRSQRTE */
+            unsupported_encoding(s, insn);
+            return;
+        default:
+            unallocated_encoding(s);
+            return;
+        }
+        break;
     default:
         /* Other categories of encoding in this class:
-         *  + floating point (single and double)
          *  + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
          *  + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2:
          *    narrowing saturate ops: size 64/32/16 -> 32/16/8
@@ -7101,12 +7268,9 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
         unsupported_encoding(s, insn);
         return;
     case 0x1b: /* FMULX */
-    case 0x1c: /* FCMEQ */
     case 0x1f: /* FRECPS */
     case 0x3f: /* FRSQRTS */
-    case 0x5c: /* FCMGE */
     case 0x5d: /* FACGE */
-    case 0x7c: /* FCMGT */
     case 0x7d: /* FACGT */
     case 0x19: /* FMLA */
     case 0x39: /* FMLS */
@@ -7114,13 +7278,16 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
         return;
     case 0x18: /* FMAXNM */
     case 0x1a: /* FADD */
+    case 0x1c: /* FCMEQ */
     case 0x1e: /* FMAX */
     case 0x38: /* FMINNM */
     case 0x3a: /* FSUB */
     case 0x3e: /* FMIN */
     case 0x5b: /* FMUL */
+    case 0x5c: /* FCMGE */
     case 0x5f: /* FDIV */
     case 0x7a: /* FABD */
+    case 0x7c: /* FCMGT */
         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
         return;
     default:
@@ -7700,6 +7867,17 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                 return;
             }
             break;
+        case 0x2c: /* FCMGT (zero) */
+        case 0x2d: /* FCMEQ (zero) */
+        case 0x2e: /* FCMLT (zero) */
+        case 0x6c: /* FCMGE (zero) */
+        case 0x6d: /* FCMLE (zero) */
+            if (size == 3 && !is_q) {
+                unallocated_encoding(s);
+                return;
+            }
+            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
+            return;
         case 0x16: /* FCVTN, FCVTN2 */
         case 0x17: /* FCVTL, FCVTL2 */
         case 0x18: /* FRINTN */
@@ -7708,9 +7886,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
         case 0x1b: /* FCVTMS */
         case 0x1c: /* FCVTAS */
         case 0x1d: /* SCVTF */
-        case 0x2c: /* FCMGT (zero) */
-        case 0x2d: /* FCMEQ (zero) */
-        case 0x2e: /* FCMLT (zero) */
         case 0x38: /* FRINTP */
         case 0x39: /* FRINTZ */
         case 0x3a: /* FCVTPS */
@@ -7724,8 +7899,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
         case 0x5b: /* FCVTMU */
         case 0x5c: /* FCVTAU */
         case 0x5d: /* UCVTF */
-        case 0x6c: /* FCMGE (zero) */
-        case 0x6d: /* FCMLE (zero) */
         case 0x79: /* FRINTI */
         case 0x7a: /* FCVTPU */
         case 0x7b: /* FCVTZU */

From bc242f9bb6324a50e7572c0997904b66b630f73a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Thu, 20 Feb 2014 10:35:50 +0000
Subject: [PATCH 07/30] target-arm: A64: Implement floating point pairwise
 insns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for the floating-point pairwise operations
FADDP, FMAXP, FMAXNMP, FMINP and FMINNMP. To do this we use the
code which was previously handling only integer pairwise operations,
and push the integer-specific decode and handling of unallocated
cases up one level in the call tree, so we can also call it from
the floating-point section of the decoder.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/translate-a64.c | 124 +++++++++++++++++++++++++------------
 1 file changed, 86 insertions(+), 38 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index bcf32a12b4..91128f57b9 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7105,39 +7105,22 @@ static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
 }
 
-/* Pairwise op subgroup of C3.6.16. */
-static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
+/* Pairwise op subgroup of C3.6.16.
+ *
+ * This is called directly or via the handle_3same_float for float pairwise
+ * operations where the opcode and size are calculated differently.
+ */
+static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
+                                   int size, int rn, int rm, int rd)
 {
-    int is_q = extract32(insn, 30, 1);
-    int u = extract32(insn, 29, 1);
-    int size = extract32(insn, 22, 2);
-    int opcode = extract32(insn, 11, 5);
-    int rm = extract32(insn, 16, 5);
-    int rn = extract32(insn, 5, 5);
-    int rd = extract32(insn, 0, 5);
+    TCGv_ptr fpst;
     int pass;
 
-    if (size == 3 && !is_q) {
-        unallocated_encoding(s);
-        return;
-    }
-
-    switch (opcode) {
-    case 0x14: /* SMAXP, UMAXP */
-    case 0x15: /* SMINP, UMINP */
-        if (size == 3) {
-            unallocated_encoding(s);
-            return;
-        }
-        break;
-    case 0x17:
-        if (u) {
-            unallocated_encoding(s);
-            return;
-        }
-        break;
-    default:
-        g_assert_not_reached();
+    /* Floating point operations need fpst */
+    if (opcode >= 0x58) {
+        fpst = get_fpstatus_ptr();
+    } else {
+        TCGV_UNUSED_PTR(fpst);
     }
 
     /* These operations work on the concatenated rm:rn, with each pair of
@@ -7155,9 +7138,28 @@ static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
             tcg_res[pass] = tcg_temp_new_i64();
 
-            /* The only 64 bit pairwise integer op is ADDP */
-            assert(opcode == 0x17);
-            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
+            switch (opcode) {
+            case 0x17: /* ADDP */
+                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
+                break;
+            case 0x58: /* FMAXNMP */
+                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5a: /* FADDP */
+                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5e: /* FMAXP */
+                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x78: /* FMINNMP */
+                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x7e: /* FMINP */
+                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            default:
+                g_assert_not_reached();
+            }
 
             tcg_temp_free_i64(tcg_op1);
             tcg_temp_free_i64(tcg_op2);
@@ -7174,7 +7176,7 @@ static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
         for (pass = 0; pass < maxpass; pass++) {
             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
-            NeonGenTwoOpFn *genfn;
+            NeonGenTwoOpFn *genfn = NULL;
             int passreg = pass < (maxpass / 2) ? rn : rm;
             int passelt = (is_q && (pass & 1)) ? 2 : 0;
 
@@ -7213,11 +7215,30 @@ static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
                 genfn = fns[size][u];
                 break;
             }
+            /* The FP operations are all on single floats (32 bit) */
+            case 0x58: /* FMAXNMP */
+                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5a: /* FADDP */
+                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5e: /* FMAXP */
+                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x78: /* FMINNMP */
+                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x7e: /* FMINP */
+                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
             default:
                 g_assert_not_reached();
             }
 
-            genfn(tcg_res[pass], tcg_op1, tcg_op2);
+            /* FP ops called directly, otherwise call now */
+            if (genfn) {
+                genfn(tcg_res[pass], tcg_op1, tcg_op2);
+            }
 
             tcg_temp_free_i32(tcg_op1);
             tcg_temp_free_i32(tcg_op2);
@@ -7231,6 +7252,10 @@ static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
             clear_vec_high(s, rd);
         }
     }
+
+    if (!TCGV_IS_UNUSED_PTR(fpst)) {
+        tcg_temp_free_ptr(fpst);
+    }
 }
 
 /* Floating point op subgroup of C3.6.16. */
@@ -7264,8 +7289,12 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
     case 0x5e: /* FMAXP */
     case 0x78: /* FMINNMP */
     case 0x7e: /* FMINP */
-        /* pairwise ops */
-        unsupported_encoding(s, insn);
+        if (size && !is_q) {
+            unallocated_encoding(s);
+            return;
+        }
+        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
+                               rn, rm, rd);
         return;
     case 0x1b: /* FMULX */
     case 0x1f: /* FRECPS */
@@ -7615,9 +7644,28 @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
     case 0x17: /* ADDP */
     case 0x14: /* SMAXP, UMAXP */
     case 0x15: /* SMINP, UMINP */
+    {
         /* Pairwise operations */
-        disas_simd_3same_pair(s, insn);
+        int is_q = extract32(insn, 30, 1);
+        int u = extract32(insn, 29, 1);
+        int size = extract32(insn, 22, 2);
+        int rm = extract32(insn, 16, 5);
+        int rn = extract32(insn, 5, 5);
+        int rd = extract32(insn, 0, 5);
+        if (opcode == 0x17) {
+            if (u || (size == 3 && !is_q)) {
+                unallocated_encoding(s);
+                return;
+            }
+        } else {
+            if (size == 3) {
+                unallocated_encoding(s);
+                return;
+            }
+        }
+        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
         break;
+    }
     case 0x18 ... 0x31:
         /* floating point ops, sz[1] and U are part of opcode */
         disas_simd_3same_float(s, insn);

From 67d43538aee10b6cfe8f3606c69187a3e142a2ba Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:50 +0000
Subject: [PATCH 08/30] softfloat: Support halving the result of muladd
 operation

The ARMv8 instruction set includes a fused floating point
reciprocal square root step instruction which demands an
"(x * y + z) / 2" fused operation. Support this by adding
a flag to the softfloat muladd operations which requests
that the result is halved before rounding.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 fpu/softfloat.c         | 38 ++++++++++++++++++++++++++++++++++++++
 include/fpu/softfloat.h |  3 +++
 2 files changed, 41 insertions(+)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index e0ea599769..fc0b179df4 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2372,6 +2372,17 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
             }
         }
         /* Zero plus something non-zero : just return the something */
+        if (flags & float_muladd_halve_result) {
+            if (cExp == 0) {
+                normalizeFloat32Subnormal(cSig, &cExp, &cSig);
+            }
+            /* Subtract one to halve, and one again because roundAndPackFloat32
+             * wants one less than the true exponent.
+             */
+            cExp -= 2;
+            cSig = (cSig | 0x00800000) << 7;
+            return roundAndPackFloat32(cSign ^ signflip, cExp, cSig STATUS_VAR);
+        }
         return packFloat32(cSign ^ signflip, cExp, cSig);
     }
 
@@ -2408,6 +2419,9 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
             /* Throw out the special case of c being an exact zero now */
             shift64RightJamming(pSig64, 32, &pSig64);
             pSig = pSig64;
+            if (flags & float_muladd_halve_result) {
+                pExp--;
+            }
             return roundAndPackFloat32(zSign, pExp - 1,
                                        pSig STATUS_VAR);
         }
@@ -2472,6 +2486,10 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
         zSig64 <<= shiftcount;
         zExp -= shiftcount;
     }
+    if (flags & float_muladd_halve_result) {
+        zExp--;
+    }
+
     shift64RightJamming(zSig64, 32, &zSig64);
     return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
 }
@@ -4088,6 +4106,17 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
             }
         }
         /* Zero plus something non-zero : just return the something */
+        if (flags & float_muladd_halve_result) {
+            if (cExp == 0) {
+                normalizeFloat64Subnormal(cSig, &cExp, &cSig);
+            }
+            /* Subtract one to halve, and one again because roundAndPackFloat64
+             * wants one less than the true exponent.
+             */
+            cExp -= 2;
+            cSig = (cSig | 0x0010000000000000ULL) << 10;
+            return roundAndPackFloat64(cSign ^ signflip, cExp, cSig STATUS_VAR);
+        }
         return packFloat64(cSign ^ signflip, cExp, cSig);
     }
 
@@ -4123,6 +4152,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
         if (!cSig) {
             /* Throw out the special case of c being an exact zero now */
             shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
+            if (flags & float_muladd_halve_result) {
+                pExp--;
+            }
             return roundAndPackFloat64(zSign, pExp - 1,
                                        pSig1 STATUS_VAR);
         }
@@ -4159,6 +4191,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
             zExp--;
         }
         shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
+        if (flags & float_muladd_halve_result) {
+            zExp--;
+        }
         return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
     } else {
         /* Subtraction */
@@ -4209,6 +4244,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
                 zExp -= (shiftcount + 64);
             }
         }
+        if (flags & float_muladd_halve_result) {
+            zExp--;
+        }
         return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
     }
 }
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 806ae13780..4b4df88527 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -249,11 +249,14 @@ void float_raise( int8 flags STATUS_PARAM);
 | Using these differs from negating an input or output before calling
 | the muladd function in that this means that a NaN doesn't have its
 | sign bit inverted before it is propagated.
+| We also support halving the result before rounding, as a special
+| case to support the ARM fused-sqrt-step instruction FRSQRTS.
 *----------------------------------------------------------------------------*/
 enum {
     float_muladd_negate_c = 1,
     float_muladd_negate_product = 2,
     float_muladd_negate_result = 4,
+    float_muladd_halve_result = 8,
 };
 
 /*----------------------------------------------------------------------------

From 057d5f62f822c4789ca0af9c9e9b42322679c793 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:50 +0000
Subject: [PATCH 09/30] target-arm: A64: Implement remaining 3-same
 instructions

Implement the remaining instructions in the SIMD 3-reg-same
and scalar-3-reg-same groups: FMULX, FRECPS, FRSQRTS, FACGE,
FACGT, FMLA and FMLS.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/helper-a64.c    | 60 ++++++++++++++++++++++++++++++++++++++
 target-arm/helper-a64.h    |  4 +++
 target-arm/helper.h        |  2 ++
 target-arm/neon_helper.c   | 16 ++++++++++
 target-arm/translate-a64.c | 52 ++++++++++++++++++++++++++++++---
 5 files changed, 130 insertions(+), 4 deletions(-)

diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index b4cab51e62..c2ce33ee88 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -198,3 +198,63 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
     float_status *fpst = fpstp;
     return -float64_lt(b, a, fpst);
 }
+
+/* Reciprocal step and sqrt step. Note that unlike the A32/T32
+ * versions, these do a fully fused multiply-add or
+ * multiply-add-and-halve.
+ */
+#define float32_two make_float32(0x40000000)
+#define float32_three make_float32(0x40400000)
+#define float32_one_point_five make_float32(0x3fc00000)
+
+#define float64_two make_float64(0x4000000000000000ULL)
+#define float64_three make_float64(0x4008000000000000ULL)
+#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
+
+float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+
+    a = float32_chs(a);
+    if ((float32_is_infinity(a) && float32_is_zero(b)) ||
+        (float32_is_infinity(b) && float32_is_zero(a))) {
+        return float32_two;
+    }
+    return float32_muladd(a, b, float32_two, 0, fpst);
+}
+
+float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+
+    a = float64_chs(a);
+    if ((float64_is_infinity(a) && float64_is_zero(b)) ||
+        (float64_is_infinity(b) && float64_is_zero(a))) {
+        return float64_two;
+    }
+    return float64_muladd(a, b, float64_two, 0, fpst);
+}
+
+float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+
+    a = float32_chs(a);
+    if ((float32_is_infinity(a) && float32_is_zero(b)) ||
+        (float32_is_infinity(b) && float32_is_zero(a))) {
+        return float32_one_point_five;
+    }
+    return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
+}
+
+float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+
+    a = float64_chs(a);
+    if ((float64_is_infinity(a) && float64_is_zero(b)) ||
+        (float64_is_infinity(b) && float64_is_zero(a))) {
+        return float64_one_point_five;
+    }
+    return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
+}
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index bf20466f50..ab9933cab0 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -32,3 +32,7 @@ DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
 DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
 DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
 DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
+DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
+DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
+DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
+DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 951e6ada07..7c60121b1f 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -382,6 +382,8 @@ DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_acge_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, ptr)
+DEF_HELPER_3(neon_acge_f64, i64, i64, i64, ptr)
+DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, ptr)
 
 /* iwmmxt_helper.c */
 DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index b4c86904f4..13752baf63 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -1823,6 +1823,22 @@ uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp)
     return -float32_lt(f1, f0, fpst);
 }
 
+uint64_t HELPER(neon_acge_f64)(uint64_t a, uint64_t b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    float64 f0 = float64_abs(make_float64(a));
+    float64 f1 = float64_abs(make_float64(b));
+    return -float64_le(f1, f0, fpst);
+}
+
+uint64_t HELPER(neon_acgt_f64)(uint64_t a, uint64_t b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    float64 f0 = float64_abs(make_float64(a));
+    float64 f1 = float64_abs(make_float64(b));
+    return -float64_lt(f1, f0, fpst);
+}
+
 #define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1))
 
 void HELPER(neon_qunzip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 91128f57b9..c96ba68520 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -6045,18 +6045,33 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
             read_vec_element(s, tcg_op2, rm, pass, MO_64);
 
             switch (fpopcode) {
+            case 0x39: /* FMLS */
+                /* As usual for ARM, separate negation for fused multiply-add */
+                gen_helper_vfp_negd(tcg_op1, tcg_op1);
+                /* fall through */
+            case 0x19: /* FMLA */
+                read_vec_element(s, tcg_res, rd, pass, MO_64);
+                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
+                                       tcg_res, fpst);
+                break;
             case 0x18: /* FMAXNM */
                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x1a: /* FADD */
                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x1b: /* FMULX */
+                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x1c: /* FCMEQ */
                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x1e: /* FMAX */
                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x1f: /* FRECPS */
+                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x38: /* FMINNM */
                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
@@ -6066,12 +6081,18 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
             case 0x3e: /* FMIN */
                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x3f: /* FRSQRTS */
+                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x5b: /* FMUL */
                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x5c: /* FCMGE */
                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x5d: /* FACGE */
+                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x5f: /* FDIV */
                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
@@ -6082,6 +6103,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
             case 0x7c: /* FCMGT */
                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x7d: /* FACGT */
+                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             default:
                 g_assert_not_reached();
             }
@@ -6101,15 +6125,30 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
 
             switch (fpopcode) {
+            case 0x39: /* FMLS */
+                /* As usual for ARM, separate negation for fused multiply-add */
+                gen_helper_vfp_negs(tcg_op1, tcg_op1);
+                /* fall through */
+            case 0x19: /* FMLA */
+                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
+                                       tcg_res, fpst);
+                break;
             case 0x1a: /* FADD */
                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x1b: /* FMULX */
+                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x1c: /* FCMEQ */
                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x1e: /* FMAX */
                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x1f: /* FRECPS */
+                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x18: /* FMAXNM */
                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
@@ -6122,12 +6161,18 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
             case 0x3e: /* FMIN */
                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x3f: /* FRSQRTS */
+                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x5b: /* FMUL */
                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x5c: /* FCMGE */
                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x5d: /* FACGE */
+                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             case 0x5f: /* FDIV */
                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
@@ -6138,6 +6183,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
             case 0x7c: /* FCMGT */
                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
+            case 0x7d: /* FACGT */
+                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
             default:
                 g_assert_not_reached();
             }
@@ -6192,8 +6240,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
         case 0x3f: /* FRSQRTS */
         case 0x5d: /* FACGE */
         case 0x7d: /* FACGT */
-            unsupported_encoding(s, insn);
-            return;
         case 0x1c: /* FCMEQ */
         case 0x5c: /* FCMGE */
         case 0x7c: /* FCMGT */
@@ -7303,8 +7349,6 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
     case 0x7d: /* FACGT */
     case 0x19: /* FMLA */
     case 0x39: /* FMLS */
-        unsupported_encoding(s, insn);
-        return;
     case 0x18: /* FMAXNM */
     case 0x1a: /* FADD */
     case 0x1c: /* FCMEQ */

From 83e9a4aec9ffcbcb86ca9d39d2c5aae8176411fb Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:50 +0000
Subject: [PATCH 10/30] target-arm/kvm-consts.h: Define QEMU constants for
 known KVM CPUs

Extend the set of CPUs for which we provide a QEMU_KVM_ARM_TARGET_*
constant to include all the ones currently supported by the kernel
headers we are using.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/kvm-consts.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/target-arm/kvm-consts.h b/target-arm/kvm-consts.h
index 0e7f889cba..6009a33f10 100644
--- a/target-arm/kvm-consts.h
+++ b/target-arm/kvm-consts.h
@@ -50,15 +50,29 @@ MISMATCH_CHECK(PSCI_FN_CPU_OFF, KVM_PSCI_FN_CPU_OFF)
 MISMATCH_CHECK(PSCI_FN_CPU_ON, KVM_PSCI_FN_CPU_ON)
 MISMATCH_CHECK(PSCI_FN_MIGRATE, KVM_PSCI_FN_MIGRATE)
 
+/* Note that KVM uses overlapping values for AArch32 and AArch64
+ * target CPU numbers. AArch32 targets:
+ */
 #define QEMU_KVM_ARM_TARGET_CORTEX_A15 0
+#define QEMU_KVM_ARM_TARGET_CORTEX_A7 1
+
+/* AArch64 targets: */
+#define QEMU_KVM_ARM_TARGET_AEM_V8 0
+#define QEMU_KVM_ARM_TARGET_FOUNDATION_V8 1
+#define QEMU_KVM_ARM_TARGET_CORTEX_A57 2
 
 /* There's no kernel define for this: sentinel value which
  * matches no KVM target value for either 64 or 32 bit
  */
 #define QEMU_KVM_ARM_TARGET_NONE UINT_MAX
 
-#ifndef TARGET_AARCH64
+#ifdef TARGET_AARCH64
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_AEM_V8, KVM_ARM_TARGET_AEM_V8)
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8)
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57)
+#else
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A15, KVM_ARM_TARGET_CORTEX_A15)
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A7, KVM_ARM_TARGET_CORTEX_A7)
 #endif
 
 #define CP_REG_ARM64                   0x6000000000000000ULL

From 76e3e1bcaefe0da394f328854cb72f9449f23732 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:51 +0000
Subject: [PATCH 11/30] target-arm: Define names for SCTLR bits

The SCTLR is full of bits for enabling or disabling various things, and so
there are many places in the code which check if certain bits are set.
Define some named constants for the SCTLR bits so these checks are easier
to read.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/cpu.c    |  2 +-
 target-arm/cpu.h    | 52 +++++++++++++++++++++++++++++++++++++++++++++
 target-arm/helper.c | 16 +++++++-------
 3 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 45ad7f0260..82943428bc 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -128,7 +128,7 @@ static void arm_cpu_reset(CPUState *s)
         }
     }
 
-    if (env->cp15.c1_sys & (1 << 13)) {
+    if (env->cp15.c1_sys & SCTLR_V) {
             env->regs[15] = 0xFFFF0000;
     }
 
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 383c58221e..52894fc33e 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -337,6 +337,58 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
                               int mmu_idx);
 #define cpu_handle_mmu_fault cpu_arm_handle_mmu_fault
 
+/* SCTLR bit meanings. Several bits have been reused in newer
+ * versions of the architecture; in that case we define constants
+ * for both old and new bit meanings. Code which tests against those
+ * bits should probably check or otherwise arrange that the CPU
+ * is the architectural version it expects.
+ */
+#define SCTLR_M       (1U << 0)
+#define SCTLR_A       (1U << 1)
+#define SCTLR_C       (1U << 2)
+#define SCTLR_W       (1U << 3) /* up to v6; RAO in v7 */
+#define SCTLR_SA      (1U << 3)
+#define SCTLR_P       (1U << 4) /* up to v5; RAO in v6 and v7 */
+#define SCTLR_SA0     (1U << 4) /* v8 onward, AArch64 only */
+#define SCTLR_D       (1U << 5) /* up to v5; RAO in v6 */
+#define SCTLR_CP15BEN (1U << 5) /* v7 onward */
+#define SCTLR_L       (1U << 6) /* up to v5; RAO in v6 and v7; RAZ in v8 */
+#define SCTLR_B       (1U << 7) /* up to v6; RAZ in v7 */
+#define SCTLR_ITD     (1U << 7) /* v8 onward */
+#define SCTLR_S       (1U << 8) /* up to v6; RAZ in v7 */
+#define SCTLR_SED     (1U << 8) /* v8 onward */
+#define SCTLR_R       (1U << 9) /* up to v6; RAZ in v7 */
+#define SCTLR_UMA     (1U << 9) /* v8 onward, AArch64 only */
+#define SCTLR_F       (1U << 10) /* up to v6 */
+#define SCTLR_SW      (1U << 10) /* v7 onward */
+#define SCTLR_Z       (1U << 11)
+#define SCTLR_I       (1U << 12)
+#define SCTLR_V       (1U << 13)
+#define SCTLR_RR      (1U << 14) /* up to v7 */
+#define SCTLR_DZE     (1U << 14) /* v8 onward, AArch64 only */
+#define SCTLR_L4      (1U << 15) /* up to v6; RAZ in v7 */
+#define SCTLR_UCT     (1U << 15) /* v8 onward, AArch64 only */
+#define SCTLR_DT      (1U << 16) /* up to ??, RAO in v6 and v7 */
+#define SCTLR_nTWI    (1U << 16) /* v8 onward */
+#define SCTLR_HA      (1U << 17)
+#define SCTLR_IT      (1U << 18) /* up to ??, RAO in v6 and v7 */
+#define SCTLR_nTWE    (1U << 18) /* v8 onward */
+#define SCTLR_WXN     (1U << 19)
+#define SCTLR_ST      (1U << 20) /* up to ??, RAZ in v6 */
+#define SCTLR_UWXN    (1U << 20) /* v7 onward */
+#define SCTLR_FI      (1U << 21)
+#define SCTLR_U       (1U << 22)
+#define SCTLR_XP      (1U << 23) /* up to v6; v7 onward RAO */
+#define SCTLR_VE      (1U << 24) /* up to v7 */
+#define SCTLR_E0E     (1U << 24) /* v8 onward, AArch64 only */
+#define SCTLR_EE      (1U << 25)
+#define SCTLR_L2      (1U << 26) /* up to v6, RAZ in v7 */
+#define SCTLR_UCI     (1U << 26) /* v8 onward, AArch64 only */
+#define SCTLR_NMFI    (1U << 27)
+#define SCTLR_TRE     (1U << 28)
+#define SCTLR_AFE     (1U << 29)
+#define SCTLR_TE      (1U << 30)
+
 #define CPSR_M (0x1fU)
 #define CPSR_T (1U << 5)
 #define CPSR_F (1U << 6)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 5ae08c9ad1..d9e94f200d 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2718,7 +2718,7 @@ void arm_cpu_do_interrupt(CPUState *cs)
         return; /* Never happens.  Keep compiler happy.  */
     }
     /* High vectors.  */
-    if (env->cp15.c1_sys & (1 << 13)) {
+    if (env->cp15.c1_sys & SCTLR_V) {
         /* when enabled, base address cannot be remapped.  */
         addr += 0xffff0000;
     } else {
@@ -2741,7 +2741,7 @@ void arm_cpu_do_interrupt(CPUState *cs)
     /* this is a lie, as the was no c1_sys on V4T/V5, but who cares
      * and we should just guard the thumb mode on V4 */
     if (arm_feature(env, ARM_FEATURE_V4T)) {
-        env->thumb = (env->cp15.c1_sys & (1 << 30)) != 0;
+        env->thumb = (env->cp15.c1_sys & SCTLR_TE) != 0;
     }
     env->regs[14] = env->regs[15] + offset;
     env->regs[15] = addr;
@@ -2769,10 +2769,10 @@ static inline int check_ap(CPUARMState *env, int ap, int domain_prot,
   case 0:
       if (access_type == 1)
           return 0;
-      switch ((env->cp15.c1_sys >> 8) & 3) {
-      case 1:
+      switch (env->cp15.c1_sys & (SCTLR_S | SCTLR_R)) {
+      case SCTLR_S:
           return is_user ? 0 : PAGE_READ;
-      case 2:
+      case SCTLR_R:
           return PAGE_READ;
       default:
           return 0;
@@ -3003,7 +3003,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
             goto do_fault;
 
         /* The simplified model uses AP[0] as an access control bit.  */
-        if ((env->cp15.c1_sys & (1 << 29)) && (ap & 1) == 0) {
+        if ((env->cp15.c1_sys & SCTLR_AFE) && (ap & 1) == 0) {
             /* Access flag fault.  */
             code = (code == 15) ? 6 : 3;
             goto do_fault;
@@ -3295,7 +3295,7 @@ static inline int get_phys_addr(CPUARMState *env, uint32_t address,
     if (address < 0x02000000)
         address += env->cp15.c13_fcse;
 
-    if ((env->cp15.c1_sys & 1) == 0) {
+    if ((env->cp15.c1_sys & SCTLR_M) == 0) {
         /* MMU/MPU disabled.  */
         *phys_ptr = address;
         *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
@@ -3308,7 +3308,7 @@ static inline int get_phys_addr(CPUARMState *env, uint32_t address,
     } else if (extended_addresses_enabled(env)) {
         return get_phys_addr_lpae(env, address, access_type, is_user, phys_ptr,
                                   prot, page_size);
-    } else if (env->cp15.c1_sys & (1 << 23)) {
+    } else if (env->cp15.c1_sys & SCTLR_XP) {
         return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr,
                                 prot, page_size);
     } else {

From 99f678a679857d83b9709127b0f047551d7e3c45 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:51 +0000
Subject: [PATCH 12/30] target-arm: Restrict check_ap() use of S and R bits to
 v6 and earlier

The SCTLR bits S and R (8 and 9) only exist in ARMv6 and earlier.
In ARMv7 these bits RAZ, and in ARMv8 they are reassigned. Guard
the use of them in check_ap() so that we don't get incorrect results
for ARMv8 CPUs.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
---
 target-arm/helper.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index d9e94f200d..13707a37a9 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2767,6 +2767,9 @@ static inline int check_ap(CPUARMState *env, int ap, int domain_prot,
 
   switch (ap) {
   case 0:
+      if (arm_feature(env, ARM_FEATURE_V7)) {
+          return 0;
+      }
       if (access_type == 1)
           return 0;
       switch (env->cp15.c1_sys & (SCTLR_S | SCTLR_R)) {

From 1456364ff0900893f81f8d06fb1b42e5c2ad8a23 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:51 +0000
Subject: [PATCH 13/30] target-arm: Remove unused ARMCPUState sr substruct

Remove the 'struct sr' from ARMCPUState -- it isn't actually used and is
a hangover from the original separate system register implementation used
by the SuSE linux-user-mode-only AArch64 target.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
---
 target-arm/cpu.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 52894fc33e..ab57f554a0 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -217,11 +217,6 @@ typedef struct CPUARMState {
         uint32_t c15_power_control; /* power control */
     } cp15;
 
-    /* System registers (AArch64) */
-    struct {
-        uint64_t tpidr_el0;
-    } sr;
-
     struct {
         uint32_t other_sp;
         uint32_t vecbase;

From 626187d86b037c89367f2f94785717b75e0e4440 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:52 +0000
Subject: [PATCH 14/30] target-arm: Log bad system register accesses with
 LOG_UNIMP

Log guest attempts to access unimplemented system registers via
the LOG_UNIMP reporting mechanism (for both the 32 bit and 64 bit
instruction sets). This is particularly useful for debugging
problems where the guest is trying to use a system register that
QEMU doesn't implement.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
---
 target-arm/translate-a64.c |  7 ++++++-
 target-arm/translate.c     | 13 +++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index c96ba68520..f6500e5181 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1177,7 +1177,12 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
                                                crn, crm, op0, op1, op2));
 
     if (!ri) {
-        /* Unknown register */
+        /* Unknown register; this might be a guest error or a QEMU
+         * unimplemented feature.
+         */
+        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
+                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
+                      isread ? "read" : "write", op0, op1, crn, crm, op2);
         unallocated_encoding(s);
         return;
     }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 782aab8b58..6d822c60b5 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -6962,6 +6962,19 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
         return 0;
     }
 
+    /* Unknown register; this might be a guest error or a QEMU
+     * unimplemented feature.
+     */
+    if (is64) {
+        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
+                      "64 bit system register cp:%d opc1: %d crm:%d\n",
+                      isread ? "read" : "write", cpnum, opc1, crm);
+    } else {
+        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
+                      "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d\n",
+                      isread ? "read" : "write", cpnum, opc1, crn, crm, opc2);
+    }
+
     return 1;
 }
 

From e508a92b621c7160122e99d3754e568f2b8e255e Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:52 +0000
Subject: [PATCH 15/30] target-arm: Stop underdecoding ARM946 PRBS registers

The ARM946 has 8 PRBS (protection region base and size) registers.
Currently we implement these with a CP_ANY reginfo; however this
underdecodes (since there are 16 possible values of CRm but only
8 registers) and we catch the invalid values in the read and
write functions. However this causes issues with migration since
we only migrate the first of a wildcard register set, so we only
migrate c6_region[0]. It also makes it awkward to pull reginfo
access checks out into their own function.

Avoid all these problems by just defining separate reginfo structs
for each of the 8 registers; this also lets us avoid having any
read or write functions and will result in more efficient direct
field accesses from generated code.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helper.c | 47 +++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 13707a37a9..135a357eb9 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1162,26 +1162,6 @@ static int pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri,
     return 0;
 }
 
-static int arm946_prbs_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                            uint64_t *value)
-{
-    if (ri->crm >= 8) {
-        return EXCP_UDEF;
-    }
-    *value = env->cp15.c6_region[ri->crm];
-    return 0;
-}
-
-static int arm946_prbs_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                             uint64_t value)
-{
-    if (ri->crm >= 8) {
-        return EXCP_UDEF;
-    }
-    env->cp15.c6_region[ri->crm] = value;
-    return 0;
-}
-
 static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
     { .name = "DATA_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
@@ -1204,9 +1184,30 @@ static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
       .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c2_insn), .resetvalue = 0, },
     /* Protection region base and size registers */
-    { .name = "946_PRBS", .cp = 15, .crn = 6, .crm = CP_ANY, .opc1 = 0,
-      .opc2 = CP_ANY, .access = PL1_RW,
-      .readfn = arm946_prbs_read, .writefn = arm946_prbs_write, },
+    { .name = "946_PRBS0", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0,
+      .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c6_region[0]) },
+    { .name = "946_PRBS1", .cp = 15, .crn = 6, .crm = 1, .opc1 = 0,
+      .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c6_region[1]) },
+    { .name = "946_PRBS2", .cp = 15, .crn = 6, .crm = 2, .opc1 = 0,
+      .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c6_region[2]) },
+    { .name = "946_PRBS3", .cp = 15, .crn = 6, .crm = 3, .opc1 = 0,
+      .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c6_region[3]) },
+    { .name = "946_PRBS4", .cp = 15, .crn = 6, .crm = 4, .opc1 = 0,
+      .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c6_region[4]) },
+    { .name = "946_PRBS5", .cp = 15, .crn = 6, .crm = 5, .opc1 = 0,
+      .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c6_region[5]) },
+    { .name = "946_PRBS6", .cp = 15, .crn = 6, .crm = 6, .opc1 = 0,
+      .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c6_region[6]) },
+    { .name = "946_PRBS7", .cp = 15, .crn = 6, .crm = 7, .opc1 = 0,
+      .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c6_region[7]) },
     REGINFO_SENTINEL
 };
 

From f59df3f2354982ee0381b87d1ce561f1eb0ed505 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:52 +0000
Subject: [PATCH 16/30] target-arm: Split cpreg access checks out from
 read/write functions

Several of the system registers handled via the ARMCPRegInfo
mechanism have access trap control bits controlling whether the
registers are accessible to lower privilege levels. Replace
the existing mechanism (allowing the read and write functions
to return EXCP_UDEF if access is denied) with a dedicated
"check access rights" function pointer in the ARMCPRegInfo.
This will allow us to simplify some of the register definitions,
which no longer need read/write functions purely to handle
the access checks.

We take the opportunity to define the return value from the
access checking function in a way that allows us to set the
correct exception syndrome information for exceptions taken
to AArch64 (which may need to distinguish access failures due
to a configurable trap or enable from other kinds of access
failure).

This commit defines the new mechanism but does not move any
of the registers across to use it.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
---
 target-arm/cpu.h           | 29 +++++++++++++++++++++++++----
 target-arm/helper.h        |  1 +
 target-arm/op_helper.c     | 18 ++++++++++++++++++
 target-arm/translate-a64.c | 11 +++++++++++
 target-arm/translate.c     | 11 +++++++++++
 5 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index ab57f554a0..cc3d0ac9f2 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -811,14 +811,29 @@ static inline int arm_current_pl(CPUARMState *env)
 
 typedef struct ARMCPRegInfo ARMCPRegInfo;
 
-/* Access functions for coprocessor registers. These should return
- * 0 on success, or one of the EXCP_* constants if access should cause
- * an exception (in which case *value is not written).
- */
+typedef enum CPAccessResult {
+    /* Access is permitted */
+    CP_ACCESS_OK = 0,
+    /* Access fails due to a configurable trap or enable which would
+     * result in a categorized exception syndrome giving information about
+     * the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6,
+     * 0xc or 0x18).
+     */
+    CP_ACCESS_TRAP = 1,
+    /* Access fails and results in an exception syndrome 0x0 ("uncategorized").
+     * Note that this is not a catch-all case -- the set of cases which may
+     * result in this failure is specifically defined by the architecture.
+     */
+    CP_ACCESS_TRAP_UNCATEGORIZED = 2,
+} CPAccessResult;
+
+/* Access functions for coprocessor registers. These should always succeed. */
 typedef int CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque,
                      uint64_t *value);
 typedef int CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque,
                       uint64_t value);
+/* Access permission check functions for coprocessor registers. */
+typedef CPAccessResult CPAccessFn(CPUARMState *env, const ARMCPRegInfo *opaque);
 /* Hook function for register reset */
 typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque);
 
@@ -872,6 +887,12 @@ struct ARMCPRegInfo {
      *  2. both readfn and writefn are specified
      */
     ptrdiff_t fieldoffset; /* offsetof(CPUARMState, field) */
+    /* Function for making any access checks for this register in addition to
+     * those specified by the 'access' permissions bits. If NULL, no extra
+     * checks required. The access check is performed at runtime, not at
+     * translate time.
+     */
+    CPAccessFn *accessfn;
     /* Function for handling reads of this register. If NULL, then reads
      * will be done by loading from the offset into CPUARMState specified
      * by fieldoffset.
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 7c60121b1f..19bd620532 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -57,6 +57,7 @@ DEF_HELPER_1(cpsr_read, i32, env)
 DEF_HELPER_3(v7m_msr, void, env, i32, i32)
 DEF_HELPER_2(v7m_mrs, i32, env, i32)
 
+DEF_HELPER_2(access_check_cp_reg, void, env, ptr)
 DEF_HELPER_3(set_cp_reg, void, env, ptr, i32)
 DEF_HELPER_2(get_cp_reg, i32, env, ptr)
 DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index a918e5b27a..34c5e7f9fb 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -273,6 +273,24 @@ void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val)
     }
 }
 
+void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip)
+{
+    const ARMCPRegInfo *ri = rip;
+    switch (ri->accessfn(env, ri)) {
+    case CP_ACCESS_OK:
+        return;
+    case CP_ACCESS_TRAP:
+    case CP_ACCESS_TRAP_UNCATEGORIZED:
+        /* These cases will eventually need to generate different
+         * syndrome information.
+         */
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    raise_exception(env, EXCP_UDEF);
+}
+
 void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value)
 {
     const ARMCPRegInfo *ri = rip;
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index f6500e5181..e70d14fe3e 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1193,6 +1193,17 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         return;
     }
 
+    if (ri->accessfn) {
+        /* Emit code to perform further access permissions checks at
+         * runtime; this may result in an exception.
+         */
+        TCGv_ptr tmpptr;
+        gen_a64_set_pc_im(s->pc - 4);
+        tmpptr = tcg_const_ptr(ri);
+        gen_helper_access_check_cp_reg(cpu_env, tmpptr);
+        tcg_temp_free_ptr(tmpptr);
+    }
+
     /* Handle special cases first */
     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
     case ARM_CP_NOP:
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 6d822c60b5..0805053aaf 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -6837,6 +6837,17 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
             return 1;
         }
 
+        if (ri->accessfn) {
+            /* Emit code to perform further access permissions checks at
+             * runtime; this may result in an exception.
+             */
+            TCGv_ptr tmpptr;
+            gen_set_pc_im(s, s->pc);
+            tmpptr = tcg_const_ptr(ri);
+            gen_helper_access_check_cp_reg(cpu_env, tmpptr);
+            tcg_temp_free_ptr(tmpptr);
+        }
+
         /* Handle special cases first */
         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
         case ARM_CP_NOP:

From fcd252062a018380e9ce14ed4d71f6d557018e2b Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:52 +0000
Subject: [PATCH 17/30] target-arm: Convert performance monitor reginfo to
 accessfn

Convert the performance monitor reginfo definitions to use
an accessfn rather than returning EXCP_UDEF from read and
write functions. This also allows us to fix a couple of XXX
cases where we weren't imposing the access restrictions on
RAZ/WI or constant registers.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helper.c | 70 ++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 42 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 135a357eb9..49894a865f 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -485,26 +485,20 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-
-static int pmreg_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                      uint64_t *value)
+static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    /* Generic performance monitor register read function for where
-     * user access may be allowed by PMUSERENR.
+    /* Perfomance monitor registers user accessibility is controlled
+     * by PMUSERENR.
      */
     if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
-        return EXCP_UDEF;
+        return CP_ACCESS_TRAP;
     }
-    *value = CPREG_FIELD32(env, ri);
-    return 0;
+    return CP_ACCESS_OK;
 }
 
 static int pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                       uint64_t value)
 {
-    if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
-        return EXCP_UDEF;
-    }
     /* only the DP, X, D and E bits are writable */
     env->cp15.c9_pmcr &= ~0x39;
     env->cp15.c9_pmcr |= (value & 0x39);
@@ -514,9 +508,6 @@ static int pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static int pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
-    if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
-        return EXCP_UDEF;
-    }
     value &= (1 << 31);
     env->cp15.c9_pmcnten |= value;
     return 0;
@@ -525,9 +516,6 @@ static int pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static int pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
-    if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
-        return EXCP_UDEF;
-    }
     value &= (1 << 31);
     env->cp15.c9_pmcnten &= ~value;
     return 0;
@@ -536,9 +524,6 @@ static int pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static int pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                         uint64_t value)
 {
-    if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
-        return EXCP_UDEF;
-    }
     env->cp15.c9_pmovsr &= ~value;
     return 0;
 }
@@ -546,9 +531,6 @@ static int pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static int pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
-    if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) {
-        return EXCP_UDEF;
-    }
     env->cp15.c9_pmxevtyper = value & 0xff;
     return 0;
 }
@@ -624,37 +606,41 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
     { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1,
       .access = PL0_RW, .resetvalue = 0,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
-      .readfn = pmreg_read, .writefn = pmcntenset_write,
-      .raw_readfn = raw_read, .raw_writefn = raw_write },
+      .writefn = pmcntenset_write,
+      .accessfn = pmreg_access,
+      .raw_writefn = raw_write },
     { .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2,
       .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
-      .readfn = pmreg_read, .writefn = pmcntenclr_write,
+      .accessfn = pmreg_access,
+      .writefn = pmcntenclr_write,
       .type = ARM_CP_NO_MIGRATE },
     { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3,
       .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
-      .readfn = pmreg_read, .writefn = pmovsr_write,
-      .raw_readfn = raw_read, .raw_writefn = raw_write },
-    /* Unimplemented so WI. Strictly speaking write accesses in PL0 should
-     * respect PMUSERENR.
-     */
+      .accessfn = pmreg_access,
+      .writefn = pmovsr_write,
+      .raw_writefn = raw_write },
+    /* Unimplemented so WI. */
     { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4,
-      .access = PL0_W, .type = ARM_CP_NOP },
+      .access = PL0_W, .accessfn = pmreg_access, .type = ARM_CP_NOP },
     /* Since we don't implement any events, writing to PMSELR is UNPREDICTABLE.
-     * We choose to RAZ/WI. XXX should respect PMUSERENR.
+     * We choose to RAZ/WI.
      */
     { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5,
-      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
-    /* Unimplemented, RAZ/WI. XXX PMUSERENR */
+      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = pmreg_access },
+    /* Unimplemented, RAZ/WI. */
     { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0,
-      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = pmreg_access },
     { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1,
       .access = PL0_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper),
-      .readfn = pmreg_read, .writefn = pmxevtyper_write,
-      .raw_readfn = raw_read, .raw_writefn = raw_write },
-    /* Unimplemented, RAZ/WI. XXX PMUSERENR */
+      .accessfn = pmreg_access, .writefn = pmxevtyper_write,
+      .raw_writefn = raw_write },
+    /* Unimplemented, RAZ/WI. */
     { .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2,
-      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = pmreg_access },
     { .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0,
       .access = PL0_R | PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr),
@@ -1708,8 +1694,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
             .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
             .access = PL0_RW, .resetvalue = cpu->midr & 0xff000000,
             .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
-            .readfn = pmreg_read, .writefn = pmcr_write,
-            .raw_readfn = raw_read, .raw_writefn = raw_write,
+            .accessfn = pmreg_access, .writefn = pmcr_write,
+            .raw_writefn = raw_write,
         };
         ARMCPRegInfo clidr = {
             .name = "CLIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1,

From 00108f2d4d86f41694bafe173271528f7e69b0b7 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:53 +0000
Subject: [PATCH 18/30] target-arm: Convert generic timer reginfo to accessfn

Convert the reginfo structs for the generic timer registers
to use access functions rather than returning EXCP_UDEF from
their read handlers. In some cases this allows us to remove
a read handler completely.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
---
 target-arm/helper.c | 122 ++++++++++++++++++++++++--------------------
 1 file changed, 66 insertions(+), 56 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 49894a865f..aec052b373 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -743,6 +743,59 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = {
 
 #ifndef CONFIG_USER_ONLY
 
+static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero */
+    if (arm_current_pl(env) == 0 && !extract32(env->cp15.c14_cntkctl, 0, 2)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx)
+{
+    /* CNT[PV]CT: not visible from PL0 if ELO[PV]CTEN is zero */
+    if (arm_current_pl(env) == 0 &&
+        !extract32(env->cp15.c14_cntkctl, timeridx, 1)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static CPAccessResult gt_timer_access(CPUARMState *env, int timeridx)
+{
+    /* CNT[PV]_CVAL, CNT[PV]_CTL, CNT[PV]_TVAL: not visible from PL0 if
+     * EL0[PV]TEN is zero.
+     */
+    if (arm_current_pl(env) == 0 &&
+        !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static CPAccessResult gt_pct_access(CPUARMState *env,
+                                         const ARMCPRegInfo *ri)
+{
+    return gt_counter_access(env, GTIMER_PHYS);
+}
+
+static CPAccessResult gt_vct_access(CPUARMState *env,
+                                         const ARMCPRegInfo *ri)
+{
+    return gt_counter_access(env, GTIMER_VIRT);
+}
+
+static CPAccessResult gt_ptimer_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    return gt_timer_access(env, GTIMER_PHYS);
+}
+
+static CPAccessResult gt_vtimer_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    return gt_timer_access(env, GTIMER_VIRT);
+}
+
 static uint64_t gt_get_countervalue(CPUARMState *env)
 {
     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / GTIMER_SCALE;
@@ -788,17 +841,6 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
     }
 }
 
-static int gt_cntfrq_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t *value)
-{
-    /* Not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero */
-    if (arm_current_pl(env) == 0 && !extract32(env->cp15.c14_cntkctl, 0, 2)) {
-        return EXCP_UDEF;
-    }
-    *value = env->cp15.c14_cntfrq;
-    return 0;
-}
-
 static void gt_cnt_reset(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
@@ -810,29 +852,10 @@ static void gt_cnt_reset(CPUARMState *env, const ARMCPRegInfo *ri)
 static int gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t *value)
 {
-    int timeridx = ri->opc1 & 1;
-
-    if (arm_current_pl(env) == 0 &&
-        !extract32(env->cp15.c14_cntkctl, timeridx, 1)) {
-        return EXCP_UDEF;
-    }
     *value = gt_get_countervalue(env);
     return 0;
 }
 
-static int gt_cval_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t *value)
-{
-    int timeridx = ri->opc1 & 1;
-
-    if (arm_current_pl(env) == 0 &&
-        !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
-        return EXCP_UDEF;
-    }
-    *value = env->cp15.c14_timer[timeridx].cval;
-    return 0;
-}
-
 static int gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
                          uint64_t value)
 {
@@ -847,10 +870,6 @@ static int gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     int timeridx = ri->crm & 1;
 
-    if (arm_current_pl(env) == 0 &&
-        !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
-        return EXCP_UDEF;
-    }
     *value = (uint32_t)(env->cp15.c14_timer[timeridx].cval -
                         gt_get_countervalue(env));
     return 0;
@@ -867,19 +886,6 @@ static int gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
     return 0;
 }
 
-static int gt_ctl_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                       uint64_t *value)
-{
-    int timeridx = ri->crm & 1;
-
-    if (arm_current_pl(env) == 0 &&
-        !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
-        return EXCP_UDEF;
-    }
-    *value = env->cp15.c14_timer[timeridx].ctl;
-    return 0;
-}
-
 static int gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
                         uint64_t value)
 {
@@ -924,7 +930,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
       .access = PL1_RW | PL0_R,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
       .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE,
-      .readfn = gt_cntfrq_read, .raw_readfn = raw_read,
+      .accessfn = gt_cntfrq_access,
     },
     /* overall control: mostly access permissions */
     { .name = "CNTKCTL", .cp = 15, .crn = 14, .crm = 1, .opc1 = 0, .opc2 = 0,
@@ -937,32 +943,36 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
       .type = ARM_CP_IO, .access = PL1_RW | PL0_R,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl),
       .resetvalue = 0,
-      .readfn = gt_ctl_read, .writefn = gt_ctl_write,
-      .raw_readfn = raw_read, .raw_writefn = raw_write,
+      .accessfn = gt_ptimer_access,
+      .writefn = gt_ctl_write, .raw_writefn = raw_write,
     },
     { .name = "CNTV_CTL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 1,
       .type = ARM_CP_IO, .access = PL1_RW | PL0_R,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl),
       .resetvalue = 0,
-      .readfn = gt_ctl_read, .writefn = gt_ctl_write,
-      .raw_readfn = raw_read, .raw_writefn = raw_write,
+      .accessfn = gt_vtimer_access,
+      .writefn = gt_ctl_write, .raw_writefn = raw_write,
     },
     /* TimerValue views: a 32 bit downcounting view of the underlying state */
     { .name = "CNTP_TVAL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 0,
       .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .accessfn = gt_ptimer_access,
       .readfn = gt_tval_read, .writefn = gt_tval_write,
     },
     { .name = "CNTV_TVAL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 0,
       .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .accessfn = gt_vtimer_access,
       .readfn = gt_tval_read, .writefn = gt_tval_write,
     },
     /* The counter itself */
     { .name = "CNTPCT", .cp = 15, .crm = 14, .opc1 = 0,
       .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO,
+      .accessfn = gt_pct_access,
       .readfn = gt_cnt_read, .resetfn = gt_cnt_reset,
     },
     { .name = "CNTVCT", .cp = 15, .crm = 14, .opc1 = 1,
       .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO,
+      .accessfn = gt_vct_access,
       .readfn = gt_cnt_read, .resetfn = gt_cnt_reset,
     },
     /* Comparison value, indicating when the timer goes off */
@@ -971,16 +981,16 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
       .type = ARM_CP_64BIT | ARM_CP_IO,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
       .resetvalue = 0,
-      .readfn = gt_cval_read, .writefn = gt_cval_write,
-      .raw_readfn = raw_read, .raw_writefn = raw_write,
+      .accessfn = gt_ptimer_access,
+      .writefn = gt_cval_write, .raw_writefn = raw_write,
     },
     { .name = "CNTV_CVAL", .cp = 15, .crm = 14, .opc1 = 3,
       .access = PL1_RW | PL0_R,
       .type = ARM_CP_64BIT | ARM_CP_IO,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
       .resetvalue = 0,
-      .readfn = gt_cval_read, .writefn = gt_cval_write,
-      .raw_readfn = raw_read, .raw_writefn = raw_write,
+      .accessfn = gt_vtimer_access,
+      .writefn = gt_cval_write, .raw_writefn = raw_write,
     },
     REGINFO_SENTINEL
 };

From 92611c0019c38c860e6926dd2073c4448c382859 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:53 +0000
Subject: [PATCH 19/30] target-arm: Convert miscellaneous reginfo structs to
 accessfn

Convert the remaining miscellaneous cases of reginfo read/write
functions returning EXCP_UDEF to use an accessfn instead:
TEEHBR, and the ATS address-translation operations.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helper.c | 44 +++++++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index aec052b373..00117c29ea 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -682,27 +682,12 @@ static int teecr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
     return 0;
 }
 
-static int teehbr_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                       uint64_t *value)
-{
-    /* This is a helper function because the user access rights
-     * depend on the value of the TEECR.
-     */
-    if (arm_current_pl(env) == 0 && (env->teecr & 1)) {
-        return EXCP_UDEF;
-    }
-    *value = env->teehbr;
-    return 0;
-}
-
-static int teehbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t value)
+static CPAccessResultg teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     if (arm_current_pl(env) == 0 && (env->teecr & 1)) {
-        return EXCP_UDEF;
+        return CP_ACCESS_TRAP;
     }
-    env->teehbr = value;
-    return 0;
+    return CP_ACCESS_OK;
 }
 
 static const ARMCPRegInfo t2ee_cp_reginfo[] = {
@@ -712,8 +697,7 @@ static const ARMCPRegInfo t2ee_cp_reginfo[] = {
       .writefn = teecr_write },
     { .name = "TEEHBR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 6, .opc2 = 0,
       .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, teehbr),
-      .resetvalue = 0, .raw_readfn = raw_read, .raw_writefn = raw_write,
-      .readfn = teehbr_read, .writefn = teehbr_write },
+      .accessfn = teehbr_access, .resetvalue = 0 },
     REGINFO_SENTINEL
 };
 
@@ -1031,6 +1015,19 @@ static inline bool extended_addresses_enabled(CPUARMState *env)
         && (env->cp15.c2_control & (1U << 31));
 }
 
+static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    if (ri->opc2 & 4) {
+        /* Other states are only available with TrustZone; in
+         * a non-TZ implementation these registers don't exist
+         * at all, which is an Uncategorized trap. This underdecoding
+         * is safe because the reginfo is NO_MIGRATE.
+         */
+        return CP_ACCESS_TRAP_UNCATEGORIZED;
+    }
+    return CP_ACCESS_OK;
+}
+
 static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     hwaddr phys_addr;
@@ -1039,10 +1036,6 @@ static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
     int ret, is_user = ri->opc2 & 2;
     int access_type = ri->opc2 & 1;
 
-    if (ri->opc2 & 4) {
-        /* Other states are only available with TrustZone */
-        return EXCP_UDEF;
-    }
     ret = get_phys_addr(env, value, access_type, is_user,
                         &phys_addr, &prot, &page_size);
     if (extended_addresses_enabled(env)) {
@@ -1095,7 +1088,8 @@ static const ARMCPRegInfo vapa_cp_reginfo[] = {
       .writefn = par_write },
 #ifndef CONFIG_USER_ONLY
     { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY,
-      .access = PL1_W, .writefn = ats_write, .type = ARM_CP_NO_MIGRATE },
+      .access = PL1_W, .accessfn = ats_access,
+      .writefn = ats_write, .type = ARM_CP_NO_MIGRATE },
 #endif
     REGINFO_SENTINEL
 };

From c4241c7d381086819131fba4fc8123848d83de8a Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:54 +0000
Subject: [PATCH 20/30] target-arm: Drop success/fail return from cpreg read
 and write functions

All cpreg read and write functions now return 0, so we can clean up
their prototypes:
 * write functions return void
 * read functions return the value rather than taking a pointer
   to write the value to

This is a fairly mechanical change which makes only the bare
minimum set of changes to the callers of read and write functions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
---
 hw/arm/pxa2xx.c        |  36 ++----
 hw/arm/pxa2xx_pic.c    |  11 +-
 target-arm/cpu.c       |   6 +-
 target-arm/cpu.h       |  23 ++--
 target-arm/helper.c    | 288 ++++++++++++++++-------------------------
 target-arm/op_helper.c |  28 ++--
 6 files changed, 154 insertions(+), 238 deletions(-)

diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 25ec549e71..64422f04cb 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -224,27 +224,24 @@ static const VMStateDescription vmstate_pxa2xx_cm = {
     }
 };
 
-static int pxa2xx_clkcfg_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                              uint64_t *value)
+static uint64_t pxa2xx_clkcfg_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     PXA2xxState *s = (PXA2xxState *)ri->opaque;
-    *value = s->clkcfg;
-    return 0;
+    return s->clkcfg;
 }
 
-static int pxa2xx_clkcfg_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                               uint64_t value)
+static void pxa2xx_clkcfg_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                uint64_t value)
 {
     PXA2xxState *s = (PXA2xxState *)ri->opaque;
     s->clkcfg = value & 0xf;
     if (value & 2) {
         printf("%s: CPU frequency change attempt\n", __func__);
     }
-    return 0;
 }
 
-static int pxa2xx_pwrmode_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                uint64_t value)
+static void pxa2xx_pwrmode_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                 uint64_t value)
 {
     PXA2xxState *s = (PXA2xxState *)ri->opaque;
     static const char *pwrmode[8] = {
@@ -310,36 +307,29 @@ static int pxa2xx_pwrmode_write(CPUARMState *env, const ARMCPRegInfo *ri,
         printf("%s: machine entered %s mode\n", __func__,
                pwrmode[value & 7]);
     }
-
-    return 0;
 }
 
-static int pxa2xx_cppmnc_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                              uint64_t *value)
+static uint64_t pxa2xx_cppmnc_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     PXA2xxState *s = (PXA2xxState *)ri->opaque;
-    *value = s->pmnc;
-    return 0;
+    return s->pmnc;
 }
 
-static int pxa2xx_cppmnc_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                               uint64_t value)
+static void pxa2xx_cppmnc_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                uint64_t value)
 {
     PXA2xxState *s = (PXA2xxState *)ri->opaque;
     s->pmnc = value;
-    return 0;
 }
 
-static int pxa2xx_cpccnt_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                              uint64_t *value)
+static uint64_t pxa2xx_cpccnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     PXA2xxState *s = (PXA2xxState *)ri->opaque;
     if (s->pmnc & 1) {
-        *value = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
     } else {
-        *value = 0;
+        return 0;
     }
-    return 0;
 }
 
 static const ARMCPRegInfo pxa_cp_reginfo[] = {
diff --git a/hw/arm/pxa2xx_pic.c b/hw/arm/pxa2xx_pic.c
index 46d337cf84..345fa4a491 100644
--- a/hw/arm/pxa2xx_pic.c
+++ b/hw/arm/pxa2xx_pic.c
@@ -217,20 +217,17 @@ static const int pxa2xx_cp_reg_map[0x10] = {
     [0xa] = ICPR2,
 };
 
-static int pxa2xx_pic_cp_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                              uint64_t *value)
+static uint64_t pxa2xx_pic_cp_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     int offset = pxa2xx_cp_reg_map[ri->crn];
-    *value = pxa2xx_pic_mem_read(ri->opaque, offset, 4);
-    return 0;
+    return pxa2xx_pic_mem_read(ri->opaque, offset, 4);
 }
 
-static int pxa2xx_pic_cp_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                               uint64_t value)
+static void pxa2xx_pic_cp_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                uint64_t value)
 {
     int offset = pxa2xx_cp_reg_map[ri->crn];
     pxa2xx_pic_mem_write(ri->opaque, offset, value, 4);
-    return 0;
 }
 
 #define REGINFO_FOR_PIC_CP(NAME, CRN) \
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 82943428bc..6e7ce8905e 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -681,14 +681,12 @@ static void cortex_a9_initfn(Object *obj)
 }
 
 #ifndef CONFIG_USER_ONLY
-static int a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                           uint64_t *value)
+static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     /* Linux wants the number of processors from here.
      * Might as well set the interrupt-controller bit too.
      */
-    *value = ((smp_cpus - 1) << 24) | (1 << 23);
-    return 0;
+    return ((smp_cpus - 1) << 24) | (1 << 23);
 }
 #endif
 
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index cc3d0ac9f2..3c8a2dba2f 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -827,11 +827,12 @@ typedef enum CPAccessResult {
     CP_ACCESS_TRAP_UNCATEGORIZED = 2,
 } CPAccessResult;
 
-/* Access functions for coprocessor registers. These should always succeed. */
-typedef int CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque,
-                     uint64_t *value);
-typedef int CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque,
-                      uint64_t value);
+/* Access functions for coprocessor registers. These cannot fail and
+ * may not raise exceptions.
+ */
+typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque);
+typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque,
+                       uint64_t value);
 /* Access permission check functions for coprocessor registers. */
 typedef CPAccessResult CPAccessFn(CPUARMState *env, const ARMCPRegInfo *opaque);
 /* Hook function for register reset */
@@ -906,14 +907,14 @@ struct ARMCPRegInfo {
     /* Function for doing a "raw" read; used when we need to copy
      * coprocessor state to the kernel for KVM or out for
      * migration. This only needs to be provided if there is also a
-     * readfn and it makes an access permission check.
+     * readfn and it has side effects (for instance clear-on-read bits).
      */
     CPReadFn *raw_readfn;
     /* Function for doing a "raw" write; used when we need to copy KVM
      * kernel coprocessor state into userspace, or for inbound
      * migration. This only needs to be provided if there is also a
-     * writefn and it makes an access permission check or masks out
-     * "unwritable" bits or has write-one-to-clear or similar behaviour.
+     * writefn and it masks out "unwritable" bits or has write-one-to-clear
+     * or similar behaviour.
      */
     CPWriteFn *raw_writefn;
     /* Function for resetting the register. If NULL, then reset will be done
@@ -948,10 +949,10 @@ static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs)
 const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp);
 
 /* CPWriteFn that can be used to implement writes-ignored behaviour */
-int arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t value);
+void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value);
 /* CPReadFn that can be used for read-as-zero behaviour */
-int arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value);
+uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri);
 
 /* CPResetFn that does nothing, for use if no reset is required even
  * if fieldoffset is non zero.
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 00117c29ea..f04e15eecd 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -107,26 +107,23 @@ static int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
     }
 }
 
-static int raw_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                    uint64_t *value)
+static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     if (ri->type & ARM_CP_64BIT) {
-        *value = CPREG_FIELD64(env, ri);
+        return CPREG_FIELD64(env, ri);
     } else {
-        *value = CPREG_FIELD32(env, ri);
+        return CPREG_FIELD32(env, ri);
     }
-    return 0;
 }
 
-static int raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                     uint64_t value)
+static void raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                      uint64_t value)
 {
     if (ri->type & ARM_CP_64BIT) {
         CPREG_FIELD64(env, ri) = value;
     } else {
         CPREG_FIELD32(env, ri) = value;
     }
-    return 0;
 }
 
 static bool read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -138,11 +135,11 @@ static bool read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
     if (ri->type & ARM_CP_CONST) {
         *v = ri->resetvalue;
     } else if (ri->raw_readfn) {
-        return (ri->raw_readfn(env, ri, v) == 0);
+        *v = ri->raw_readfn(env, ri);
     } else if (ri->readfn) {
-        return (ri->readfn(env, ri, v) == 0);
+        *v = ri->readfn(env, ri);
     } else {
-        raw_read(env, ri, v);
+        *v = raw_read(env, ri);
     }
     return true;
 }
@@ -159,9 +156,9 @@ static bool write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
     if (ri->type & ARM_CP_CONST) {
         return true;
     } else if (ri->raw_writefn) {
-        return (ri->raw_writefn(env, ri, v) == 0);
+        ri->raw_writefn(env, ri, v);
     } else if (ri->writefn) {
-        return (ri->writefn(env, ri, v) == 0);
+        ri->writefn(env, ri, v);
     } else {
         raw_write(env, ri, v);
     }
@@ -309,14 +306,13 @@ void init_cpreg_list(ARMCPU *cpu)
     g_list_free(keys);
 }
 
-static int dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     env->cp15.c3 = value;
     tlb_flush(env, 1); /* Flush TLB as domain not tracked in TLB */
-    return 0;
 }
 
-static int fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     if (env->cp15.c13_fcse != value) {
         /* Unlike real hardware the qemu TLB uses virtual addresses,
@@ -325,10 +321,10 @@ static int fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
         tlb_flush(env, 1);
         env->cp15.c13_fcse = value;
     }
-    return 0;
 }
-static int contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                            uint64_t value)
+
+static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
 {
     if (env->cp15.c13_context != value && !arm_feature(env, ARM_FEATURE_MPU)) {
         /* For VMSA (when not using the LPAE long descriptor page table
@@ -338,39 +334,34 @@ static int contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
         tlb_flush(env, 1);
     }
     env->cp15.c13_context = value;
-    return 0;
 }
 
-static int tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                         uint64_t value)
+static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t value)
 {
     /* Invalidate all (TLBIALL) */
     tlb_flush(env, 1);
-    return 0;
 }
 
-static int tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                         uint64_t value)
+static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t value)
 {
     /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */
     tlb_flush_page(env, value & TARGET_PAGE_MASK);
-    return 0;
 }
 
-static int tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t value)
+static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                           uint64_t value)
 {
     /* Invalidate by ASID (TLBIASID) */
     tlb_flush(env, value == 0);
-    return 0;
 }
 
-static int tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t value)
+static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                           uint64_t value)
 {
     /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */
     tlb_flush_page(env, value & TARGET_PAGE_MASK);
-    return 0;
 }
 
 static const ARMCPRegInfo cp_reginfo[] = {
@@ -450,14 +441,14 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static int cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t value)
 {
     if (env->cp15.c1_coproc != value) {
         env->cp15.c1_coproc = value;
         /* ??? Is this safe when called from within a TB?  */
         tb_flush(env);
     }
-    return 0;
 }
 
 static const ARMCPRegInfo v6_cp_reginfo[] = {
@@ -496,89 +487,77 @@ static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri)
     return CP_ACCESS_OK;
 }
 
-static int pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                      uint64_t value)
+static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                       uint64_t value)
 {
     /* only the DP, X, D and E bits are writable */
     env->cp15.c9_pmcr &= ~0x39;
     env->cp15.c9_pmcr |= (value & 0x39);
-    return 0;
 }
 
-static int pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
+static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
     value &= (1 << 31);
     env->cp15.c9_pmcnten |= value;
-    return 0;
 }
 
-static int pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                            uint64_t value)
+static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
 {
     value &= (1 << 31);
     env->cp15.c9_pmcnten &= ~value;
-    return 0;
 }
 
-static int pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t value)
+static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value)
 {
     env->cp15.c9_pmovsr &= ~value;
-    return 0;
 }
 
-static int pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                            uint64_t value)
+static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
 {
     env->cp15.c9_pmxevtyper = value & 0xff;
-    return 0;
 }
 
-static int pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
     env->cp15.c9_pmuserenr = value & 1;
-    return 0;
 }
 
-static int pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                            uint64_t value)
+static void pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
 {
     /* We have no event counters so only the C bit can be changed */
     value &= (1 << 31);
     env->cp15.c9_pminten |= value;
-    return 0;
 }
 
-static int pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                            uint64_t value)
+static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
 {
     value &= (1 << 31);
     env->cp15.c9_pminten &= ~value;
-    return 0;
 }
 
-static int vbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                      uint64_t value)
+static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                       uint64_t value)
 {
     env->cp15.c12_vbar = value & ~0x1Ful;
-    return 0;
 }
 
-static int ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                       uint64_t *value)
+static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
-    *value = cpu->ccsidr[env->cp15.c0_cssel];
-    return 0;
+    return cpu->ccsidr[env->cp15.c0_cssel];
 }
 
-static int csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t value)
+static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value)
 {
     env->cp15.c0_cssel = value & 0xf;
-    return 0;
 }
 
 static const ARMCPRegInfo v7_cp_reginfo[] = {
@@ -675,14 +654,14 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static int teecr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t value)
 {
     value &= 1;
     env->teecr = value;
-    return 0;
 }
 
-static CPAccessResultg teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri)
+static CPAccessResult teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     if (arm_current_pl(env) == 0 && (env->teecr & 1)) {
         return CP_ACCESS_TRAP;
@@ -833,45 +812,40 @@ static void gt_cnt_reset(CPUARMState *env, const ARMCPRegInfo *ri)
     timer_del(cpu->gt_timer[timeridx]);
 }
 
-static int gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                       uint64_t *value)
+static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    *value = gt_get_countervalue(env);
-    return 0;
+    return gt_get_countervalue(env);
 }
 
-static int gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                         uint64_t value)
+static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t value)
 {
     int timeridx = ri->opc1 & 1;
 
     env->cp15.c14_timer[timeridx].cval = value;
     gt_recalc_timer(arm_env_get_cpu(env), timeridx);
-    return 0;
 }
-static int gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t *value)
+
+static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     int timeridx = ri->crm & 1;
 
-    *value = (uint32_t)(env->cp15.c14_timer[timeridx].cval -
-                        gt_get_countervalue(env));
-    return 0;
+    return (uint32_t)(env->cp15.c14_timer[timeridx].cval -
+                      gt_get_countervalue(env));
 }
 
-static int gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                         uint64_t value)
+static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t value)
 {
     int timeridx = ri->crm & 1;
 
     env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) +
         + sextract64(value, 0, 32);
     gt_recalc_timer(arm_env_get_cpu(env), timeridx);
-    return 0;
 }
 
-static int gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t value)
+static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
     int timeridx = ri->crm & 1;
@@ -888,7 +862,6 @@ static int gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
         qemu_set_irq(cpu->gt_timer_outputs[timeridx],
                      (oldval & 4) && (value & 2));
     }
-    return 0;
 }
 
 void arm_gt_ptimer_cb(void *opaque)
@@ -990,7 +963,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
 
 #endif
 
-static int par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     if (arm_feature(env, ARM_FEATURE_LPAE)) {
         env->cp15.c7_par = value;
@@ -999,7 +972,6 @@ static int par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
     } else {
         env->cp15.c7_par = value & 0xfffff1ff;
     }
-    return 0;
 }
 
 #ifndef CONFIG_USER_ONLY
@@ -1028,7 +1000,7 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri)
     return CP_ACCESS_OK;
 }
 
-static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     hwaddr phys_addr;
     target_ulong page_size;
@@ -1077,7 +1049,6 @@ static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
         }
         env->cp15.c7_par_hi = 0;
     }
-    return 0;
 }
 #endif
 
@@ -1124,32 +1095,26 @@ static uint32_t extended_mpu_ap_bits(uint32_t val)
     return ret;
 }
 
-static int pmsav5_data_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                uint64_t value)
+static void pmsav5_data_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                 uint64_t value)
 {
     env->cp15.c5_data = extended_mpu_ap_bits(value);
-    return 0;
 }
 
-static int pmsav5_data_ap_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                               uint64_t *value)
+static uint64_t pmsav5_data_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    *value = simple_mpu_ap_bits(env->cp15.c5_data);
-    return 0;
+    return simple_mpu_ap_bits(env->cp15.c5_data);
 }
 
-static int pmsav5_insn_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                uint64_t value)
+static void pmsav5_insn_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                 uint64_t value)
 {
     env->cp15.c5_insn = extended_mpu_ap_bits(value);
-    return 0;
 }
 
-static int pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                               uint64_t *value)
+static uint64_t pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    *value = simple_mpu_ap_bits(env->cp15.c5_insn);
-    return 0;
+    return simple_mpu_ap_bits(env->cp15.c5_insn);
 }
 
 static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
@@ -1201,8 +1166,8 @@ static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static int vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                uint64_t value)
+static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                 uint64_t value)
 {
     int maskshift = extract32(value, 0, 3);
 
@@ -1219,11 +1184,10 @@ static int vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
     env->cp15.c2_control = value;
     env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> maskshift);
     env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> maskshift);
-    return 0;
 }
 
-static int vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                            uint64_t value)
+static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
 {
     if (arm_feature(env, ARM_FEATURE_LPAE)) {
         /* With LPAE the TTBCR could result in a change of ASID
@@ -1231,7 +1195,7 @@ static int vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
          */
         tlb_flush(env, 1);
     }
-    return vmsa_ttbcr_raw_write(env, ri, value);
+    vmsa_ttbcr_raw_write(env, ri, value);
 }
 
 static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -1264,40 +1228,36 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static int omap_ticonfig_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                               uint64_t value)
+static void omap_ticonfig_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                uint64_t value)
 {
     env->cp15.c15_ticonfig = value & 0xe7;
     /* The OS_TYPE bit in this register changes the reported CPUID! */
     env->cp15.c0_cpuid = (value & (1 << 5)) ?
         ARM_CPUID_TI915T : ARM_CPUID_TI925T;
-    return 0;
 }
 
-static int omap_threadid_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                               uint64_t value)
+static void omap_threadid_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                uint64_t value)
 {
     env->cp15.c15_threadid = value & 0xffff;
-    return 0;
 }
 
-static int omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t value)
+static void omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                           uint64_t value)
 {
     /* Wait-for-interrupt (deprecated) */
     cpu_interrupt(CPU(arm_env_get_cpu(env)), CPU_INTERRUPT_HALT);
-    return 0;
 }
 
-static int omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                 uint64_t value)
+static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                  uint64_t value)
 {
     /* On OMAP there are registers indicating the max/min index of dcache lines
      * containing a dirty line; cache flush operations have to reset these.
      */
     env->cp15.c15_i_max = 0x000;
     env->cp15.c15_i_min = 0xff0;
-    return 0;
 }
 
 static const ARMCPRegInfo omap_cp_reginfo[] = {
@@ -1339,8 +1299,8 @@ static const ARMCPRegInfo omap_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static int xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                             uint64_t value)
+static void xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                              uint64_t value)
 {
     value &= 0x3fff;
     if (env->cp15.c15_cpar != value) {
@@ -1348,7 +1308,6 @@ static int xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri,
         tb_flush(env);
         env->cp15.c15_cpar = value;
     }
-    return 0;
 }
 
 static const ARMCPRegInfo xscale_cp_reginfo[] = {
@@ -1428,8 +1387,7 @@ static const ARMCPRegInfo strongarm_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static int mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                      uint64_t *value)
+static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
     uint32_t mpidr = cs->cpu_index;
@@ -1444,8 +1402,7 @@ static int mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri,
          * not currently model any of those cores.
          */
     }
-    *value = mpidr;
-    return 0;
+    return mpidr;
 }
 
 static const ARMCPRegInfo mpidr_cp_reginfo[] = {
@@ -1454,17 +1411,16 @@ static const ARMCPRegInfo mpidr_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static int par64_read(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value)
+static uint64_t par64_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    *value = ((uint64_t)env->cp15.c7_par_hi << 32) | env->cp15.c7_par;
-    return 0;
+    return ((uint64_t)env->cp15.c7_par_hi << 32) | env->cp15.c7_par;
 }
 
-static int par64_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void par64_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t value)
 {
     env->cp15.c7_par_hi = value >> 32;
     env->cp15.c7_par = value;
-    return 0;
 }
 
 static void par64_reset(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -1473,27 +1429,24 @@ static void par64_reset(CPUARMState *env, const ARMCPRegInfo *ri)
     env->cp15.c7_par = 0;
 }
 
-static int ttbr064_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t *value)
+static uint64_t ttbr064_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    *value = ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0;
-    return 0;
+    return ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0;
 }
 
-static int ttbr064_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                             uint64_t value)
+static void ttbr064_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                              uint64_t value)
 {
     env->cp15.c2_base0_hi = value >> 32;
     env->cp15.c2_base0 = value;
-    return 0;
 }
 
-static int ttbr064_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                         uint64_t value)
+static void ttbr064_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t value)
 {
     /* Writes to the 64 bit format TTBRs may change the ASID */
     tlb_flush(env, 1);
-    return ttbr064_raw_write(env, ri, value);
+    ttbr064_raw_write(env, ri, value);
 }
 
 static void ttbr064_reset(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -1502,19 +1455,16 @@ static void ttbr064_reset(CPUARMState *env, const ARMCPRegInfo *ri)
     env->cp15.c2_base0 = 0;
 }
 
-static int ttbr164_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t *value)
+static uint64_t ttbr164_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    *value = ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1;
-    return 0;
+    return ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1;
 }
 
-static int ttbr164_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                         uint64_t value)
+static void ttbr164_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t value)
 {
     env->cp15.c2_base1_hi = value >> 32;
     env->cp15.c2_base1 = value;
-    return 0;
 }
 
 static void ttbr164_reset(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -1551,32 +1501,26 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static int aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t *value)
+static uint64_t aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    *value = vfp_get_fpcr(env);
-    return 0;
+    return vfp_get_fpcr(env);
 }
 
-static int aa64_fpcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                           uint64_t value)
+static void aa64_fpcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t value)
 {
     vfp_set_fpcr(env, value);
-    return 0;
 }
 
-static int aa64_fpsr_read(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t *value)
+static uint64_t aa64_fpsr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    *value = vfp_get_fpsr(env);
-    return 0;
+    return vfp_get_fpsr(env);
 }
 
-static int aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                           uint64_t value)
+static void aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t value)
 {
     vfp_set_fpsr(env, value);
-    return 0;
 }
 
 static const ARMCPRegInfo v8_cp_reginfo[] = {
@@ -1609,13 +1553,13 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static int sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t value)
 {
     env->cp15.c1_sys = value;
     /* ??? Lots of these bits are not implemented.  */
     /* This may enable/disable the MMU, so do a TLB flush.  */
     tlb_flush(env, 1);
-    return 0;
 }
 
 void register_cp_regs_for_features(ARMCPU *cpu)
@@ -2193,17 +2137,15 @@ const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp)
     return g_hash_table_lookup(cpregs, &encoded_cp);
 }
 
-int arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t value)
+void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value)
 {
     /* Helper coprocessor write function for write-ignore registers */
-    return 0;
 }
 
-int arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value)
+uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     /* Helper coprocessor write function for read-as-zero registers */
-    *value = 0;
     return 0;
 }
 
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 34c5e7f9fb..eb0fccd98f 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -294,41 +294,29 @@ void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip)
 void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value)
 {
     const ARMCPRegInfo *ri = rip;
-    int excp = ri->writefn(env, ri, value);
-    if (excp) {
-        raise_exception(env, excp);
-    }
+
+    ri->writefn(env, ri, value);
 }
 
 uint32_t HELPER(get_cp_reg)(CPUARMState *env, void *rip)
 {
     const ARMCPRegInfo *ri = rip;
-    uint64_t value;
-    int excp = ri->readfn(env, ri, &value);
-    if (excp) {
-        raise_exception(env, excp);
-    }
-    return value;
+
+    return ri->readfn(env, ri);
 }
 
 void HELPER(set_cp_reg64)(CPUARMState *env, void *rip, uint64_t value)
 {
     const ARMCPRegInfo *ri = rip;
-    int excp = ri->writefn(env, ri, value);
-    if (excp) {
-        raise_exception(env, excp);
-    }
+
+    ri->writefn(env, ri, value);
 }
 
 uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
 {
     const ARMCPRegInfo *ri = rip;
-    uint64_t value;
-    int excp = ri->readfn(env, ri, &value);
-    if (excp) {
-        raise_exception(env, excp);
-    }
-    return value;
+
+    return ri->readfn(env, ri);
 }
 
 /* ??? Flag setting arithmetic is awkward because we need to do comparisons.

From ea4571eb8768c39bf6cacd64fdcb6ef405b18b18 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:54 +0000
Subject: [PATCH 21/30] target-arm: Remove unnecessary code now read/write fns
 can't fail

Now that cpreg read and write functions can't fail and throw an
exception, we can remove the code from the translator that synchronises
the guest PC in case an exception is thrown.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/translate-a64.c | 2 --
 target-arm/translate.c     | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index e70d14fe3e..ef305e3fa2 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1231,7 +1231,6 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
         } else if (ri->readfn) {
             TCGv_ptr tmpptr;
-            gen_a64_set_pc_im(s->pc - 4);
             tmpptr = tcg_const_ptr(ri);
             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
             tcg_temp_free_ptr(tmpptr);
@@ -1244,7 +1243,6 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
             return;
         } else if (ri->writefn) {
             TCGv_ptr tmpptr;
-            gen_a64_set_pc_im(s->pc - 4);
             tmpptr = tcg_const_ptr(ri);
             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
             tcg_temp_free_ptr(tmpptr);
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 0805053aaf..6ccf0ba482 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -6876,7 +6876,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                     tmp64 = tcg_const_i64(ri->resetvalue);
                 } else if (ri->readfn) {
                     TCGv_ptr tmpptr;
-                    gen_set_pc_im(s, s->pc);
                     tmp64 = tcg_temp_new_i64();
                     tmpptr = tcg_const_ptr(ri);
                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
@@ -6899,7 +6898,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                     tmp = tcg_const_i32(ri->resetvalue);
                 } else if (ri->readfn) {
                     TCGv_ptr tmpptr;
-                    gen_set_pc_im(s, s->pc);
                     tmp = tcg_temp_new_i32();
                     tmpptr = tcg_const_ptr(ri);
                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
@@ -6934,7 +6932,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                 tcg_temp_free_i32(tmphi);
                 if (ri->writefn) {
                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
-                    gen_set_pc_im(s, s->pc);
                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
                     tcg_temp_free_ptr(tmpptr);
                 } else {
@@ -6945,7 +6942,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                 if (ri->writefn) {
                     TCGv_i32 tmp;
                     TCGv_ptr tmpptr;
-                    gen_set_pc_im(s, s->pc);
                     tmp = load_reg(s, rt);
                     tmpptr = tcg_const_ptr(ri);
                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);

From 59a1c327d72683e80fa1b886d42a3a5c17729484 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:54 +0000
Subject: [PATCH 22/30] target-arm: Remove failure status return from
 read/write_raw_cp_reg

The read_raw_cp_reg and write_raw_cp_reg functions can now never
fail (in fact they should never have failed previously unless
there was a bug in a reginfo that meant no raw accessor was
provided for a might-trap register). This allows us to clean up
their prototypes so the write function returns void and the
read function returns the value read, which in turn lets us
simplify the callers.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
---
 target-arm/helper.c | 36 ++++++++++++------------------------
 1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index f04e15eecd..59db883023 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -126,35 +126,30 @@ static void raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
     }
 }
 
-static bool read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
-                            uint64_t *v)
+static uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    /* Raw read of a coprocessor register (as needed for migration, etc)
-     * return true on success, false if the read is impossible for some reason.
-     */
+    /* Raw read of a coprocessor register (as needed for migration, etc). */
     if (ri->type & ARM_CP_CONST) {
-        *v = ri->resetvalue;
+        return ri->resetvalue;
     } else if (ri->raw_readfn) {
-        *v = ri->raw_readfn(env, ri);
+        return ri->raw_readfn(env, ri);
     } else if (ri->readfn) {
-        *v = ri->readfn(env, ri);
+        return ri->readfn(env, ri);
     } else {
-        *v = raw_read(env, ri);
+        return raw_read(env, ri);
     }
-    return true;
 }
 
-static bool write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
+static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
                              int64_t v)
 {
     /* Raw write of a coprocessor register (as needed for migration, etc).
-     * Return true on success, false if the write is impossible for some reason.
      * Note that constant registers are treated as write-ignored; the
      * caller should check for success by whether a readback gives the
      * value written.
      */
     if (ri->type & ARM_CP_CONST) {
-        return true;
+        return;
     } else if (ri->raw_writefn) {
         ri->raw_writefn(env, ri, v);
     } else if (ri->writefn) {
@@ -162,7 +157,6 @@ static bool write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
     } else {
         raw_write(env, ri, v);
     }
-    return true;
 }
 
 bool write_cpustate_to_list(ARMCPU *cpu)
@@ -174,7 +168,7 @@ bool write_cpustate_to_list(ARMCPU *cpu)
     for (i = 0; i < cpu->cpreg_array_len; i++) {
         uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
         const ARMCPRegInfo *ri;
-        uint64_t v;
+
         ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
         if (!ri) {
             ok = false;
@@ -183,11 +177,7 @@ bool write_cpustate_to_list(ARMCPU *cpu)
         if (ri->type & ARM_CP_NO_MIGRATE) {
             continue;
         }
-        if (!read_raw_cp_reg(&cpu->env, ri, &v)) {
-            ok = false;
-            continue;
-        }
-        cpu->cpreg_values[i] = v;
+        cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri);
     }
     return ok;
 }
@@ -200,7 +190,6 @@ bool write_list_to_cpustate(ARMCPU *cpu)
     for (i = 0; i < cpu->cpreg_array_len; i++) {
         uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
         uint64_t v = cpu->cpreg_values[i];
-        uint64_t readback;
         const ARMCPRegInfo *ri;
 
         ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
@@ -215,9 +204,8 @@ bool write_list_to_cpustate(ARMCPU *cpu)
          * (to catch read-only registers and partially read-only
          * registers where the incoming migration value doesn't match)
          */
-        if (!write_raw_cp_reg(&cpu->env, ri, v) ||
-            !read_raw_cp_reg(&cpu->env, ri, &readback) ||
-            readback != v) {
+        write_raw_cp_reg(&cpu->env, ri, v);
+        if (read_raw_cp_reg(&cpu->env, ri) != v) {
             ok = false;
         }
     }

From 7900e9f1f963184967594f4518c97ea682a9605f Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:54 +0000
Subject: [PATCH 23/30] target-arm: Fix incorrect type for value argument to
 write_raw_cp_reg

The write_raw_cp_reg's value argument should be a uint64_t, since
that's what all its callers hand it and what all the functions it
calls take. A (harmless) typo meant we were accidentally declaring
it as int64_t.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
---
 target-arm/helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 59db883023..1b111b6e85 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -141,7 +141,7 @@ static uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri)
 }
 
 static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
-                             int64_t v)
+                             uint64_t v)
 {
     /* Raw write of a coprocessor register (as needed for migration, etc).
      * Note that constant registers are treated as write-ignored; the

From d324b36ad93b84509aeee41f05ee17a6c5de7826 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:55 +0000
Subject: [PATCH 24/30] target-arm: A64: Implement store-exclusive for system
 mode

System mode store-exclusive use a different code path to usermode ones;
implement this missing code, in a similar way to the 32 bit version.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/translate-a64.c | 66 +++++++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 5 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index ef305e3fa2..3de9cad367 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1487,12 +1487,68 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
 }
 #else
 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
-                                TCGv_i64 addr, int size, int is_pair)
+                                TCGv_i64 inaddr, int size, int is_pair)
 {
-    qemu_log_mask(LOG_UNIMP,
-                  "%s:%d: system mode store_exclusive unsupported "
-                  "at pc=%016" PRIx64 "\n",
-                  __FILE__, __LINE__, s->pc - 4);
+    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
+     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
+     *     [addr] = {Rt};
+     *     if (is_pair) {
+     *         [addr + datasize] = {Rt2};
+     *     }
+     *     {Rd} = 0;
+     * } else {
+     *     {Rd} = 1;
+     * }
+     * env->exclusive_addr = -1;
+     */
+    int fail_label = gen_new_label();
+    int done_label = gen_new_label();
+    TCGv_i64 addr = tcg_temp_local_new_i64();
+    TCGv_i64 tmp;
+
+    /* Copy input into a local temp so it is not trashed when the
+     * basic block ends at the branch insn.
+     */
+    tcg_gen_mov_i64(addr, inaddr);
+    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
+
+    tmp = tcg_temp_new_i64();
+    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
+    tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
+    tcg_temp_free_i64(tmp);
+
+    if (is_pair) {
+        TCGv_i64 addrhi = tcg_temp_new_i64();
+        TCGv_i64 tmphi = tcg_temp_new_i64();
+
+        tcg_gen_addi_i64(addrhi, addr, 1 << size);
+        tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
+        tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
+
+        tcg_temp_free_i64(tmphi);
+        tcg_temp_free_i64(addrhi);
+    }
+
+    /* We seem to still have the exclusive monitor, so do the store */
+    tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
+    if (is_pair) {
+        TCGv_i64 addrhi = tcg_temp_new_i64();
+
+        tcg_gen_addi_i64(addrhi, addr, 1 << size);
+        tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
+                            get_mem_index(s), MO_TE + size);
+        tcg_temp_free_i64(addrhi);
+    }
+
+    tcg_temp_free_i64(addr);
+
+    tcg_gen_movi_i64(cpu_reg(s, rd), 0);
+    tcg_gen_br(done_label);
+    gen_set_label(fail_label);
+    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
+    gen_set_label(done_label);
+    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
+
 }
 #endif
 

From 13caf1fd2bdc4d8799098d73001caf7d8924073e Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:55 +0000
Subject: [PATCH 25/30] target-arm: A64: Add opcode comments to
 disas_simd_three_reg_diff

The opcode switch in disas_simd_three_reg_diff() is missing the
customary comments indicating which cases correspond to which
instructions. Add them.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/translate-a64.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 3de9cad367..d4e7a20617 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7091,24 +7091,24 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
         /* 128 x 128 -> 64 */
         unsupported_encoding(s, insn);
         break;
-    case 9:
-    case 11:
-    case 13:
-    case 14:
+    case 9: /* SQDMLAL, SQDMLAL2 */
+    case 11: /* SQDMLSL, SQDMLSL2 */
+    case 13: /* SQDMULL, SQDMULL2 */
+    case 14: /* PMULL, PMULL2 */
         if (is_u) {
             unallocated_encoding(s);
             return;
         }
         /* fall through */
-    case 0:
-    case 2:
+    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
         unsupported_encoding(s, insn);
         break;
-    case 5:
-    case 7:
-    case 8:
-    case 10:
-    case 12:
+    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
+    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
+    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
         /* 64 x 64 -> 128 */
         if (size == 3) {
             unallocated_encoding(s);

From 70d7f984a0a9e03c1327487aed9877a4eca0e709 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:55 +0000
Subject: [PATCH 26/30] target-arm: A64: Add most remaining three-reg-diff
 widening ops

Add the remainder of the 64x64->128 operations in the three-reg-diff
category except for PMULL, PMULL2.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/translate-a64.c | 109 ++++++++++++++++++++++++++++++-------
 1 file changed, 88 insertions(+), 21 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index d4e7a20617..809fbdbe93 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -75,8 +75,10 @@ typedef struct AArch64DecodeTable {
 /* Function prototype for gen_ functions for calling Neon helpers */
 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
+typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
 
@@ -6879,6 +6881,24 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
     }
 }
 
+/* Generate code to do a "long" addition or subtraction, ie one done in
+ * TCGv_i64 on vector lanes twice the width specified by size.
+ */
+static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
+                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
+{
+    static NeonGenTwo64OpFn * const fns[3][2] = {
+        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
+        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
+        { tcg_gen_add_i64, tcg_gen_sub_i64 },
+    };
+    NeonGenTwo64OpFn *genfn;
+    assert(size < 3);
+
+    genfn = fns[size][is_sub];
+    genfn(tcg_res, tcg_op1, tcg_op2);
+}
+
 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
                                 int opcode, int rd, int rn, int rm)
 {
@@ -6934,6 +6954,12 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
             }
 
             switch (opcode) {
+            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
+                break;
+            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
+                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
+                break;
             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
             {
@@ -6954,15 +6980,31 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
                 break;
+            case 9: /* SQDMLAL, SQDMLAL2 */
+            case 11: /* SQDMLSL, SQDMLSL2 */
+            case 13: /* SQDMULL, SQDMULL2 */
+                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
+                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+                                                  tcg_passres, tcg_passres);
+                break;
             default:
                 g_assert_not_reached();
             }
 
-            if (accop > 0) {
+            if (opcode == 9 || opcode == 11) {
+                /* saturating accumulate ops */
+                if (accop < 0) {
+                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
+                }
+                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+                                                  tcg_res[pass], tcg_passres);
+            } else if (accop > 0) {
                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
-                tcg_temp_free_i64(tcg_passres);
             } else if (accop < 0) {
                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+            }
+
+            if (accop != 0) {
                 tcg_temp_free_i64(tcg_passres);
             }
 
@@ -6987,6 +7029,23 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
             }
 
             switch (opcode) {
+            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
+            {
+                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
+                static NeonGenWidenFn * const widenfns[2][2] = {
+                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
+                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
+                };
+                NeonGenWidenFn *widenfn = widenfns[size][is_u];
+
+                widenfn(tcg_op2_64, tcg_op2);
+                widenfn(tcg_passres, tcg_op1);
+                gen_neon_addl(size, (opcode == 2), tcg_passres,
+                              tcg_passres, tcg_op2_64);
+                tcg_temp_free_i64(tcg_op2_64);
+                break;
+            }
             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
                 if (size == 0) {
@@ -7020,28 +7079,32 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
                     }
                 }
                 break;
+            case 9: /* SQDMLAL, SQDMLAL2 */
+            case 11: /* SQDMLSL, SQDMLSL2 */
+            case 13: /* SQDMULL, SQDMULL2 */
+                assert(size == 1);
+                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
+                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+                                                  tcg_passres, tcg_passres);
+                break;
             default:
                 g_assert_not_reached();
             }
             tcg_temp_free_i32(tcg_op1);
             tcg_temp_free_i32(tcg_op2);
 
-            if (accop > 0) {
-                if (size == 0) {
-                    gen_helper_neon_addl_u16(tcg_res[pass], tcg_res[pass],
-                                             tcg_passres);
+            if (accop != 0) {
+                if (opcode == 9 || opcode == 11) {
+                    /* saturating accumulate ops */
+                    if (accop < 0) {
+                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
+                    }
+                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+                                                      tcg_res[pass],
+                                                      tcg_passres);
                 } else {
-                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
-                                             tcg_passres);
-                }
-                tcg_temp_free_i64(tcg_passres);
-            } else if (accop < 0) {
-                if (size == 0) {
-                    gen_helper_neon_subl_u16(tcg_res[pass], tcg_res[pass],
-                                             tcg_passres);
-                } else {
-                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
-                                             tcg_passres);
+                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
+                                  tcg_res[pass], tcg_passres);
                 }
                 tcg_temp_free_i64(tcg_passres);
             }
@@ -7091,19 +7154,23 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
         /* 128 x 128 -> 64 */
         unsupported_encoding(s, insn);
         break;
+    case 14: /* PMULL, PMULL2 */
+        if (is_u || size == 1 || size == 2) {
+            unallocated_encoding(s);
+            return;
+        }
+        unsupported_encoding(s, insn);
+        break;
     case 9: /* SQDMLAL, SQDMLAL2 */
     case 11: /* SQDMLSL, SQDMLSL2 */
     case 13: /* SQDMULL, SQDMULL2 */
-    case 14: /* PMULL, PMULL2 */
-        if (is_u) {
+        if (is_u || size == 0) {
             unallocated_encoding(s);
             return;
         }
         /* fall through */
     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
-        unsupported_encoding(s, insn);
-        break;
     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */

From dfc15c7cebaa467de7ada96d2688d4f5f4d3e532 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:56 +0000
Subject: [PATCH 27/30] target-arm: A64: Implement the wide 3-reg-different
 operations

Implement the wide three-reg-different operations:
SADDW, UADDW, SSUBW and USUBW.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/translate-a64.c | 41 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 809fbdbe93..511f15e588 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7117,6 +7117,41 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
     tcg_temp_free_i64(tcg_res[1]);
 }
 
+static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
+                            int opcode, int rd, int rn, int rm)
+{
+    TCGv_i64 tcg_res[2];
+    int part = is_q ? 2 : 0;
+    int pass;
+
+    for (pass = 0; pass < 2; pass++) {
+        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
+        static NeonGenWidenFn * const widenfns[3][2] = {
+            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
+            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
+            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
+        };
+        NeonGenWidenFn *widenfn = widenfns[size][is_u];
+
+        read_vec_element(s, tcg_op1, rn, pass, MO_64);
+        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
+        widenfn(tcg_op2_wide, tcg_op2);
+        tcg_temp_free_i32(tcg_op2);
+        tcg_res[pass] = tcg_temp_new_i64();
+        gen_neon_addl(size, (opcode == 3),
+                      tcg_res[pass], tcg_op1, tcg_op2_wide);
+        tcg_temp_free_i64(tcg_op1);
+        tcg_temp_free_i64(tcg_op2_wide);
+    }
+
+    for (pass = 0; pass < 2; pass++) {
+        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+        tcg_temp_free_i64(tcg_res[pass]);
+    }
+}
+
 /* C3.6.15 AdvSIMD three different
  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
@@ -7147,7 +7182,11 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
         /* 64 x 128 -> 128 */
-        unsupported_encoding(s, insn);
+        if (size == 3) {
+            unallocated_encoding(s);
+            return;
+        }
+        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
         break;
     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */

From e4b998d47df70450f5aafc0fd7b3d56fa4920369 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:56 +0000
Subject: [PATCH 28/30] target-arm: A64: Implement narrowing three-reg-diff
 operations

Implement the narrowing three-reg-diff operations: ADDHN,
RADDHN, SUBHN and RSUBHN.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/translate-a64.c | 60 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 511f15e588..cb630d8bed 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7152,6 +7152,60 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
     }
 }
 
+static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in)
+{
+    tcg_gen_shri_i64(in, in, 32);
+    tcg_gen_trunc_i64_i32(res, in);
+}
+
+static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
+{
+    tcg_gen_addi_i64(in, in, 1U << 31);
+    do_narrow_high_u32(res, in);
+}
+
+static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
+                                 int opcode, int rd, int rn, int rm)
+{
+    TCGv_i32 tcg_res[2];
+    int part = is_q ? 2 : 0;
+    int pass;
+
+    for (pass = 0; pass < 2; pass++) {
+        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
+        static NeonGenNarrowFn * const narrowfns[3][2] = {
+            { gen_helper_neon_narrow_high_u8,
+              gen_helper_neon_narrow_round_high_u8 },
+            { gen_helper_neon_narrow_high_u16,
+              gen_helper_neon_narrow_round_high_u16 },
+            { do_narrow_high_u32, do_narrow_round_high_u32 },
+        };
+        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
+
+        read_vec_element(s, tcg_op1, rn, pass, MO_64);
+        read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
+
+        tcg_temp_free_i64(tcg_op1);
+        tcg_temp_free_i64(tcg_op2);
+
+        tcg_res[pass] = tcg_temp_new_i32();
+        gennarrow(tcg_res[pass], tcg_wideres);
+        tcg_temp_free_i64(tcg_wideres);
+    }
+
+    for (pass = 0; pass < 2; pass++) {
+        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
+        tcg_temp_free_i32(tcg_res[pass]);
+    }
+    if (!is_q) {
+        clear_vec_high(s, rd);
+    }
+}
+
 /* C3.6.15 AdvSIMD three different
  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
@@ -7191,7 +7245,11 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
         /* 128 x 128 -> 64 */
-        unsupported_encoding(s, insn);
+        if (size == 3) {
+            unallocated_encoding(s);
+            return;
+        }
+        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
         break;
     case 14: /* PMULL, PMULL2 */
         if (is_u || size == 1 || size == 2) {

From 60510aed69f69bfd44bf6e58c571dbba8b1b43a0 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 20 Feb 2014 10:35:56 +0000
Subject: [PATCH 29/30] target-arm: A64: Implement unprivileged load/store

Implement the unprivileged load and store instructions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-arm/translate-a64.c | 69 ++++++++++++++++++++------------------
 1 file changed, 37 insertions(+), 32 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index cb630d8bed..8752e7e16c 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -618,20 +618,26 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
  */
 
 /*
- * Store from GPR register to memory
+ * Store from GPR register to memory.
  */
+static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
+                             TCGv_i64 tcg_addr, int size, int memidx)
+{
+    g_assert(size <= 3);
+    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
+}
+
 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
                       TCGv_i64 tcg_addr, int size)
 {
-    g_assert(size <= 3);
-    tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size);
+    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
 }
 
 /*
  * Load from memory to GPR register
  */
-static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
-                      int size, bool is_signed, bool extend)
+static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+                             int size, bool is_signed, bool extend, int memidx)
 {
     TCGMemOp memop = MO_TE + size;
 
@@ -641,7 +647,7 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
         memop += MO_SIGN;
     }
 
-    tcg_gen_qemu_ld_i64(dest, tcg_addr, get_mem_index(s), memop);
+    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 
     if (extend && is_signed) {
         g_assert(size < 3);
@@ -649,6 +655,13 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
     }
 }
 
+static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+                      int size, bool is_signed, bool extend)
+{
+    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
+                     get_mem_index(s));
+}
+
 /*
  * Store from FP register to memory
  */
@@ -1824,6 +1837,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
  * +----+-------+---+-----+-----+---+--------+-----+------+------+
  *
  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
+         10 -> unprivileged
  * V = 0 -> non-vector
  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
@@ -1839,6 +1853,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
     bool is_signed = false;
     bool is_store = false;
     bool is_extended = false;
+    bool is_unpriv = (idx == 2);
     bool is_vector = extract32(insn, 26, 1);
     bool post_index;
     bool writeback;
@@ -1847,7 +1862,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
 
     if (is_vector) {
         size |= (opc & 2) << 1;
-        if (size > 4) {
+        if (size > 4 || is_unpriv) {
             unallocated_encoding(s);
             return;
         }
@@ -1855,6 +1870,10 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
     } else {
         if (size == 3 && opc == 2) {
             /* PRFM - prefetch */
+            if (is_unpriv) {
+                unallocated_encoding(s);
+                return;
+            }
             return;
         }
         if (opc == 3 && size > 1) {
@@ -1868,6 +1887,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
 
     switch (idx) {
     case 0:
+    case 2:
         post_index = false;
         writeback = false;
         break;
@@ -1879,9 +1899,6 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
         post_index = false;
         writeback = true;
         break;
-    case 2:
-        g_assert(false);
-        break;
     }
 
     if (rn == 31) {
@@ -1901,10 +1918,13 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
         }
     } else {
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
+        int memidx = is_unpriv ? 1 : get_mem_index(s);
+
         if (is_store) {
-            do_gpr_st(s, tcg_rt, tcg_addr, size);
+            do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
         } else {
-            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
+            do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
+                             is_signed, is_extended, memidx);
         }
     }
 
@@ -2084,25 +2104,6 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
     }
 }
 
-/* Load/store register (immediate forms) */
-static void disas_ldst_reg_imm(DisasContext *s, uint32_t insn)
-{
-    switch (extract32(insn, 10, 2)) {
-    case 0: case 1: case 3:
-        /* Load/store register (unscaled immediate) */
-        /* Load/store immediate pre/post-indexed */
-        disas_ldst_reg_imm9(s, insn);
-        break;
-    case 2:
-        /* Load/store register unprivileged */
-        unsupported_encoding(s, insn);
-        break;
-    default:
-        unallocated_encoding(s);
-        break;
-    }
-}
-
 /* Load/store register (all forms) */
 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
 {
@@ -2111,7 +2112,11 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
             disas_ldst_reg_roffset(s, insn);
         } else {
-            disas_ldst_reg_imm(s, insn);
+            /* Load/store register (unscaled immediate)
+             * Load/store immediate pre/post-indexed
+             * Load/store register unprivileged
+             */
+            disas_ldst_reg_imm9(s, insn);
         }
         break;
     case 1:

From 2ea5a2ca1f1dc302652d2ad5035e0b209ccaa177 Mon Sep 17 00:00:00 2001
From: Janne Grunau <j@jannau.net>
Date: Thu, 20 Feb 2014 10:35:56 +0000
Subject: [PATCH 30/30] linux-user: AArch64: Fix exclusive store of the zero
 register

Signed-off-by: Janne Grunau <j@jannau.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 linux-user/main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index cabc9e1a0e..919297736c 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -953,7 +953,8 @@ static int do_strex_a64(CPUARMState *env)
             goto finish;
         }
     }
-    val = env->xregs[rt];
+    /* handle the zero register */
+    val = rt == 31 ? 0 : env->xregs[rt];
     switch (size) {
     case 0:
         segv = put_user_u8(val, addr);
@@ -972,7 +973,8 @@ static int do_strex_a64(CPUARMState *env)
         goto error;
     }
     if (is_pair) {
-        val = env->xregs[rt2];
+        /* handle the zero register */
+        val = rt2 == 31 ? 0 : env->xregs[rt2];
         if (size == 2) {
             segv = put_user_u32(val, addr + 4);
         } else {