diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 12fcefe0c6..2242eee514 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -62,6 +62,25 @@ typedef struct ARMCPU {
 
     /* Coprocessor information */
     GHashTable *cp_regs;
+    /* For marshalling (mostly coprocessor) register state between the
+     * kernel and QEMU (for KVM) and between two QEMUs (for migration),
+     * we use these arrays.
+     */
+    /* List of register indexes managed via these arrays; (full KVM style
+     * 64 bit indexes, not CPRegInfo 32 bit indexes)
+     */
+    uint64_t *cpreg_indexes;
+    /* Values of the registers (cpreg_indexes[i]'s value is cpreg_values[i]) */
+    uint64_t *cpreg_values;
+    /* Length of the indexes, values arrays */
+    int32_t cpreg_array_len;
+    /* These are used only for migration: incoming data arrives in
+     * these fields and is sanity checked in post_load before copying
+     * to the working data structures above.
+     */
+    uint64_t *cpreg_vmstate_indexes;
+    uint64_t *cpreg_vmstate_values;
+    int32_t cpreg_vmstate_array_len;
 
     /* The instance init functions for implementation-specific subclasses
      * set these fields to specify the implementation-dependent values of
@@ -116,6 +135,7 @@ extern const struct VMStateDescription vmstate_arm_cpu;
 #endif
 
 void register_cp_regs_for_features(ARMCPU *cpu);
+void init_cpreg_list(ARMCPU *cpu);
 
 void arm_cpu_do_interrupt(CPUState *cpu);
 void arm_v7m_cpu_do_interrupt(CPUState *cpu);
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 496a59f5c0..241f032f50 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -204,6 +204,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
     register_cp_regs_for_features(cpu);
     arm_cpu_register_gdb_regs_for_features(cpu);
 
+    init_cpreg_list(cpu);
+
     cpu_reset(CPU(cpu));
     qemu_init_vcpu(env);
 
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 1d8eba502a..abcc0b4b1c 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -424,6 +424,43 @@ void armv7m_nvic_complete_irq(void *opaque, int irq);
     (((cp) << 16) | ((is64) << 15) | ((crn) << 11) |    \
      ((crm) << 7) | ((opc1) << 3) | (opc2))
 
+/* Note that these must line up with the KVM/ARM register
+ * ID field definitions (kvm.c will check this, but we
+ * can't just use the KVM defines here as the kvm headers
+ * are unavailable to non-KVM-specific files)
+ */
+#define CP_REG_SIZE_SHIFT 52
+#define CP_REG_SIZE_MASK       0x00f0000000000000ULL
+#define CP_REG_SIZE_U32        0x0020000000000000ULL
+#define CP_REG_SIZE_U64        0x0030000000000000ULL
+#define CP_REG_ARM             0x4000000000000000ULL
+
+/* Convert a full 64 bit KVM register ID to the truncated 32 bit
+ * version used as a key for the coprocessor register hashtable
+ */
+static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid)
+{
+    uint32_t cpregid = kvmid;
+    if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
+        cpregid |= (1 << 15);
+    }
+    return cpregid;
+}
+
+/* Convert a truncated 32 bit hashtable key into the full
+ * 64 bit KVM register ID.
+ */
+static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
+{
+    uint64_t kvmid = cpregid & ~(1 << 15);
+    if (cpregid & (1 << 15)) {
+        kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM;
+    } else {
+        kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM;
+    }
+    return kvmid;
+}
+
 /* ARMCPRegInfo type field bits. If the SPECIAL bit is set this is a
  * special-behaviour cp reg and bits [15..8] indicate what behaviour
  * it has. Otherwise it is a simple cp reg, where CONST indicates that
@@ -621,6 +658,38 @@ static inline bool cp_access_ok(CPUARMState *env,
     return (ri->access >> ((arm_current_pl(env) * 2) + isread)) & 1;
 }
 
+/**
+ * write_list_to_cpustate
+ * @cpu: ARMCPU
+ *
+ * For each register listed in the ARMCPU cpreg_indexes list, write
+ * its value from the cpreg_values list into the ARMCPUState structure.
+ * This updates TCG's working data structures from KVM data or
+ * from incoming migration state.
+ *
+ * Returns: true if all register values were updated correctly,
+ * false if some register was unknown or could not be written.
+ * Note that we do not stop early on failure -- we will attempt
+ * writing all registers in the list.
+ */
+bool write_list_to_cpustate(ARMCPU *cpu);
+
+/**
+ * write_cpustate_to_list:
+ * @cpu: ARMCPU
+ *
+ * For each register listed in the ARMCPU cpreg_indexes list, write
+ * its value from the ARMCPUState structure into the cpreg_values list.
+ * This is used to copy info from TCG's working data structures into
+ * KVM or for outbound migration.
+ *
+ * Returns: true if all register values were read correctly,
+ * false if some register was unknown or could not be read.
+ * Note that we do not stop early on failure -- we will attempt
+ * reading all registers in the list.
+ */
+bool write_cpustate_to_list(ARMCPU *cpu);
+
 /* Does the core conform to the the "MicroController" profile. e.g. Cortex-M3.
    Note the M in older cores (eg. ARM7TDMI) stands for Multiply. These are
    conventional cores (ie. Application or Realtime profile).  */
diff --git a/target-arm/helper.c b/target-arm/helper.c
index baf75764e6..5f639fdeb8 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -78,6 +78,180 @@ static int raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
     return 0;
 }
 
+static bool read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t *v)
+{
+    /* Raw read of a coprocessor register (as needed for migration, etc)
+     * return true on success, false if the read is impossible for some reason.
+     */
+    if (ri->type & ARM_CP_CONST) {
+        *v = ri->resetvalue;
+    } else if (ri->raw_readfn) {
+        return (ri->raw_readfn(env, ri, v) == 0);
+    } else if (ri->readfn) {
+        return (ri->readfn(env, ri, v) == 0);
+    } else {
+        if (ri->type & ARM_CP_64BIT) {
+            *v = CPREG_FIELD64(env, ri);
+        } else {
+            *v = CPREG_FIELD32(env, ri);
+        }
+    }
+    return true;
+}
+
+static bool write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
+                             int64_t v)
+{
+    /* Raw write of a coprocessor register (as needed for migration, etc).
+     * Return true on success, false if the write is impossible for some reason.
+     * Note that constant registers are treated as write-ignored; the
+     * caller should check for success by whether a readback gives the
+     * value written.
+     */
+    if (ri->type & ARM_CP_CONST) {
+        return true;
+    } else if (ri->raw_writefn) {
+        return (ri->raw_writefn(env, ri, v) == 0);
+    } else if (ri->writefn) {
+        return (ri->writefn(env, ri, v) == 0);
+    } else {
+        if (ri->type & ARM_CP_64BIT) {
+            CPREG_FIELD64(env, ri) = v;
+        } else {
+            CPREG_FIELD32(env, ri) = v;
+        }
+    }
+    return true;
+}
+
+bool write_cpustate_to_list(ARMCPU *cpu)
+{
+    /* Write the coprocessor state from cpu->env to the (index,value) list. */
+    int i;
+    bool ok = true;
+
+    for (i = 0; i < cpu->cpreg_array_len; i++) {
+        uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
+        const ARMCPRegInfo *ri;
+        uint64_t v;
+        ri = get_arm_cp_reginfo(cpu, regidx);
+        if (!ri) {
+            ok = false;
+            continue;
+        }
+        if (ri->type & ARM_CP_NO_MIGRATE) {
+            continue;
+        }
+        if (!read_raw_cp_reg(&cpu->env, ri, &v)) {
+            ok = false;
+            continue;
+        }
+        cpu->cpreg_values[i] = v;
+    }
+    return ok;
+}
+
+bool write_list_to_cpustate(ARMCPU *cpu)
+{
+    int i;
+    bool ok = true;
+
+    for (i = 0; i < cpu->cpreg_array_len; i++) {
+        uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
+        uint64_t v = cpu->cpreg_values[i];
+        uint64_t readback;
+        const ARMCPRegInfo *ri;
+
+        ri = get_arm_cp_reginfo(cpu, regidx);
+        if (!ri) {
+            ok = false;
+            continue;
+        }
+        if (ri->type & ARM_CP_NO_MIGRATE) {
+            continue;
+        }
+        /* Write value and confirm it reads back as written
+         * (to catch read-only registers and partially read-only
+         * registers where the incoming migration value doesn't match)
+         */
+        if (!write_raw_cp_reg(&cpu->env, ri, v) ||
+            !read_raw_cp_reg(&cpu->env, ri, &readback) ||
+            readback != v) {
+            ok = false;
+        }
+    }
+    return ok;
+}
+
+static void add_cpreg_to_list(gpointer key, gpointer opaque)
+{
+    ARMCPU *cpu = opaque;
+    uint64_t regidx;
+    const ARMCPRegInfo *ri;
+
+    regidx = *(uint32_t *)key;
+    ri = get_arm_cp_reginfo(cpu, regidx);
+
+    if (!(ri->type & ARM_CP_NO_MIGRATE)) {
+        cpu->cpreg_indexes[cpu->cpreg_array_len] = cpreg_to_kvm_id(regidx);
+        /* The value array need not be initialized at this point */
+        cpu->cpreg_array_len++;
+    }
+}
+
+static void count_cpreg(gpointer key, gpointer opaque)
+{
+    ARMCPU *cpu = opaque;
+    uint64_t regidx;
+    const ARMCPRegInfo *ri;
+
+    regidx = *(uint32_t *)key;
+    ri = get_arm_cp_reginfo(cpu, regidx);
+
+    if (!(ri->type & ARM_CP_NO_MIGRATE)) {
+        cpu->cpreg_array_len++;
+    }
+}
+
+static gint cpreg_key_compare(gconstpointer a, gconstpointer b)
+{
+    uint32_t aidx = *(uint32_t *)a;
+    uint32_t bidx = *(uint32_t *)b;
+
+    return aidx - bidx;
+}
+
+void init_cpreg_list(ARMCPU *cpu)
+{
+    /* Initialise the cpreg_tuples[] array based on the cp_regs hash.
+     * Note that we require cpreg_tuples[] to be sorted by key ID.
+     */
+    GList *keys;
+    int arraylen;
+
+    keys = g_hash_table_get_keys(cpu->cp_regs);
+    keys = g_list_sort(keys, cpreg_key_compare);
+
+    cpu->cpreg_array_len = 0;
+
+    g_list_foreach(keys, count_cpreg, cpu);
+
+    arraylen = cpu->cpreg_array_len;
+    cpu->cpreg_indexes = g_new(uint64_t, arraylen);
+    cpu->cpreg_values = g_new(uint64_t, arraylen);
+    cpu->cpreg_vmstate_indexes = g_new(uint64_t, arraylen);
+    cpu->cpreg_vmstate_values = g_new(uint64_t, arraylen);
+    cpu->cpreg_vmstate_array_len = cpu->cpreg_array_len;
+    cpu->cpreg_array_len = 0;
+
+    g_list_foreach(keys, add_cpreg_to_list, cpu);
+
+    assert(cpu->cpreg_array_len == arraylen);
+
+    g_list_free(keys);
+}
+
 static int dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     env->cp15.c3 = value;
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 27dcab93f6..f427537c5f 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -23,6 +23,15 @@
 #include "cpu.h"
 #include "hw/arm/arm.h"
 
+/* Check that cpu.h's idea of coprocessor fields matches KVM's */
+#if (CP_REG_SIZE_SHIFT != KVM_REG_SIZE_SHIFT) || \
+    (CP_REG_SIZE_MASK != KVM_REG_SIZE_MASK) ||   \
+    (CP_REG_SIZE_U32 != KVM_REG_SIZE_U32) || \
+    (CP_REG_SIZE_U64 != KVM_REG_SIZE_U64) || \
+    (CP_REG_ARM != KVM_REG_ARM)
+#error mismatch between cpu.h and KVM header definitions
+#endif
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 4dd057c488..076dc1672d 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -148,11 +148,65 @@ static const VMStateInfo vmstate_cpsr = {
     .put = put_cpsr,
 };
 
+static void cpu_pre_save(void *opaque)
+{
+    ARMCPU *cpu = opaque;
+
+    if (!write_cpustate_to_list(cpu)) {
+        /* This should never fail. */
+        abort();
+    }
+
+    cpu->cpreg_vmstate_array_len = cpu->cpreg_array_len;
+    memcpy(cpu->cpreg_vmstate_indexes, cpu->cpreg_indexes,
+           cpu->cpreg_array_len * sizeof(uint64_t));
+    memcpy(cpu->cpreg_vmstate_values, cpu->cpreg_values,
+           cpu->cpreg_array_len * sizeof(uint64_t));
+}
+
+static int cpu_post_load(void *opaque, int version_id)
+{
+    ARMCPU *cpu = opaque;
+    int i, v;
+
+    /* Update the values list from the incoming migration data.
+     * Anything in the incoming data which we don't know about is
+     * a migration failure; anything we know about but the incoming
+     * data doesn't specify retains its current (reset) value.
+     * The indexes list remains untouched -- we only inspect the
+     * incoming migration index list so we can match the values array
+     * entries with the right slots in our own values array.
+     */
+
+    for (i = 0, v = 0; i < cpu->cpreg_array_len
+             && v < cpu->cpreg_vmstate_array_len; i++) {
+        if (cpu->cpreg_vmstate_indexes[v] > cpu->cpreg_indexes[i]) {
+            /* register in our list but not incoming : skip it */
+            continue;
+        }
+        if (cpu->cpreg_vmstate_indexes[v] < cpu->cpreg_indexes[i]) {
+            /* register in their list but not ours: fail migration */
+            return -1;
+        }
+        /* matching register, copy the value over */
+        cpu->cpreg_values[i] = cpu->cpreg_vmstate_values[v];
+        v++;
+    }
+
+    if (!write_list_to_cpustate(cpu)) {
+        return -1;
+    }
+
+    return 0;
+}
+
 const VMStateDescription vmstate_arm_cpu = {
     .name = "cpu",
-    .version_id = 11,
-    .minimum_version_id = 11,
-    .minimum_version_id_old = 11,
+    .version_id = 12,
+    .minimum_version_id = 12,
+    .minimum_version_id_old = 12,
+    .pre_save = cpu_pre_save,
+    .post_load = cpu_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(env.regs, ARMCPU, 16),
         {
@@ -169,50 +223,16 @@ const VMStateDescription vmstate_arm_cpu = {
         VMSTATE_UINT32_ARRAY(env.banked_r14, ARMCPU, 6),
         VMSTATE_UINT32_ARRAY(env.usr_regs, ARMCPU, 5),
         VMSTATE_UINT32_ARRAY(env.fiq_regs, ARMCPU, 5),
-        VMSTATE_UINT32(env.cp15.c0_cpuid, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c0_cssel, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c1_sys, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c1_coproc, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c1_xscaleauxcr, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c1_scr, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c2_base0, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c2_base0_hi, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c2_base1, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c2_base1_hi, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c2_control, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c2_mask, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c2_base_mask, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c2_data, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c2_insn, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c3, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c5_insn, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c5_data, ARMCPU),
-        VMSTATE_UINT32_ARRAY(env.cp15.c6_region, ARMCPU, 8),
-        VMSTATE_UINT32(env.cp15.c6_insn, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c6_data, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c7_par, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c7_par_hi, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c9_insn, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c9_data, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c9_pmcr, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c9_pmcnten, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c9_pmovsr, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c9_pmxevtyper, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c9_pmuserenr, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c9_pminten, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c13_fcse, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c13_context, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c13_tls1, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c13_tls2, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c13_tls3, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c15_cpar, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c15_ticonfig, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c15_i_max, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c15_i_min, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c15_threadid, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c15_power_control, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c15_diagnostic, ARMCPU),
-        VMSTATE_UINT32(env.cp15.c15_power_diagnostic, ARMCPU),
+        /* The length-check must come before the arrays to avoid
+         * incoming data possibly overflowing the array.
+         */
+        VMSTATE_INT32_LE(cpreg_vmstate_array_len, ARMCPU),
+        VMSTATE_VARRAY_INT32(cpreg_vmstate_indexes, ARMCPU,
+                             cpreg_vmstate_array_len,
+                             0, vmstate_info_uint64, uint64_t),
+        VMSTATE_VARRAY_INT32(cpreg_vmstate_values, ARMCPU,
+                             cpreg_vmstate_array_len,
+                             0, vmstate_info_uint64, uint64_t),
         VMSTATE_UINT32(env.exclusive_addr, ARMCPU),
         VMSTATE_UINT32(env.exclusive_val, ARMCPU),
         VMSTATE_UINT32(env.exclusive_high, ARMCPU),