diff --git a/.travis.yml b/.travis.yml
index ef63fc16b3..678e33decc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -135,7 +135,7 @@ matrix:
     # TCG debug can be run just on its own and is mostly agnostic to user/softmmu distinctions
     - env:
         - CONFIG="--enable-debug-tcg --disable-system"
-        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-debug"
+        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-debug-tcg"
 
 
     - env:
@@ -336,14 +336,29 @@ matrix:
     - env:
         - CONFIG="--disable-system --enable-debug-tcg"
         - TEST_CMD="make -j3 check-tcg V=1"
-        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-default"
+        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-debug-tcg"
+
+
+    # Run check-tcg against linux-user (with plugins)
+    # we skip sparc64-linux-user until it has been fixed somewhat
+    - env:
+        - CONFIG="--disable-system --enable-plugins --enable-debug-tcg --target-list-exclude=sparc64-linux-user"
+        - TEST_CMD="make -j3 check-tcg V=1"
+        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-debug-tcg"
 
 
     # Run check-tcg against softmmu targets
     - env:
         - CONFIG="--enable-debug-tcg --target-list=xtensa-softmmu,arm-softmmu,aarch64-softmmu,alpha-softmmu"
         - TEST_CMD="make -j3 check-tcg V=1"
-        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-default"
+        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-debug-tcg"
+
+
+    # Run check-tcg against softmmu targets (with plugins)
+    - env:
+        - CONFIG="--enable-plugins --enable-debug-tcg --target-list=xtensa-softmmu,arm-softmmu,aarch64-softmmu,alpha-softmmu"
+        - TEST_CMD="make -j3 check-tcg V=1"
+        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-debug-tcg"
 
 
     # Release builds
diff --git a/MAINTAINERS b/MAINTAINERS
index 40a69a6a1b..92961faa0e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2360,6 +2360,12 @@ M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: tcg/
 
+TCG Plugins
+M: Alex Bennée <alex.bennee@linaro.org>
+S: Maintained
+F: plugins/
+F: tests/plugin
+
 AArch64 TCG target
 M: Claudio Fontana <claudio.fontana@huawei.com>
 M: Claudio Fontana <claudio.fontana@gmail.com>
diff --git a/Makefile b/Makefile
index 0e994a275d..bd6376d295 100644
--- a/Makefile
+++ b/Makefile
@@ -74,6 +74,12 @@ CONFIG_ALL=y
 config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios $(SRC_PATH)/VERSION
 	@echo $@ is out-of-date, running configure
 	@./config.status
+
+# Force configure to re-run if the API symbols are updated
+ifeq ($(CONFIG_PLUGIN),y)
+config-host.mak: $(SRC_PATH)/plugins/qemu-plugins.symbols
+endif
+
 else
 config-host.mak:
 ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
@@ -737,6 +743,7 @@ distclean: clean
 	rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
 	rm -f qemu-doc.log qemu-doc.pdf qemu-doc.pg qemu-doc.toc qemu-doc.tp
 	rm -f qemu-doc.vr qemu-doc.txt
+	rm -f qemu-plugins-ld.symbols qemu-plugins-ld64.symbols
 	rm -f config.log
 	rm -f linux-headers/asm
 	rm -f docs/version.texi
@@ -853,7 +860,11 @@ endif
 
 ICON_SIZES=16x16 24x24 32x32 48x48 64x64 128x128 256x256 512x512
 
-install: all $(if $(BUILD_DOCS),install-doc) install-datadir install-localstatedir \
+install-includedir:
+	$(INSTALL_DIR) "$(DESTDIR)$(includedir)"
+
+install: all $(if $(BUILD_DOCS),install-doc) \
+	install-datadir install-localstatedir install-includedir \
 	$(if $(INSTALL_BLOBS),$(edk2-decompressed)) \
 	recurse-install
 ifneq ($(TOOLS),)
@@ -915,6 +926,9 @@ endif
 		"$(DESTDIR)$(qemu_desktopdir)/qemu.desktop"
 ifdef CONFIG_GTK
 	$(MAKE) -C po $@
+endif
+ifeq ($(CONFIG_PLUGIN),y)
+	$(INSTALL_DATA) $(SRC_PATH)/include/qemu/qemu-plugin.h "$(DESTDIR)$(includedir)/qemu-plugin.h"
 endif
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/keymaps"
 	set -e; for x in $(KEYMAPS); do \
diff --git a/Makefile.target b/Makefile.target
index ca3d14efe1..24d79d26eb 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -119,6 +119,8 @@ obj-y += disas.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
 LIBS := $(libs_cpu) $(LIBS)
 
+obj-$(CONFIG_PLUGIN) += plugins/
+
 #########################################################
 # Linux user emulator target
 
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
index e2d23edafe..677191a69c 100644
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -11,7 +11,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "cpu.h"
 #include "tcg/tcg.h"
 #include "exec/exec-all.h"
diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
index d381a02f34..a92f2c454b 100644
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -6,3 +6,4 @@ obj-y += translator.o
 
 obj-$(CONFIG_USER_ONLY) += user-exec.o
 obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
+obj-$(CONFIG_PLUGIN) += plugin-gen.o
diff --git a/accel/tcg/atomic_common.inc.c b/accel/tcg/atomic_common.inc.c
new file mode 100644
index 0000000000..344525b0bb
--- /dev/null
+++ b/accel/tcg/atomic_common.inc.c
@@ -0,0 +1,54 @@
+/*
+ * Common Atomic Helper Functions
+ *
+ * This file should be included before the various instantiations of
+ * the atomic_template.h helpers.
+ *
+ * Copyright (c) 2019 Linaro
+ * Written by Alex Bennée <alex.bennee@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+static inline
+void atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr, uint16_t info)
+{
+    CPUState *cpu = env_cpu(env);
+
+    trace_guest_mem_before_exec(cpu, addr, info);
+    trace_guest_mem_before_exec(cpu, addr, info | TRACE_MEM_ST);
+}
+
+static inline void
+atomic_trace_rmw_post(CPUArchState *env, target_ulong addr, uint16_t info)
+{
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info | TRACE_MEM_ST);
+}
+
+static inline
+void atomic_trace_ld_pre(CPUArchState *env, target_ulong addr, uint16_t info)
+{
+    trace_guest_mem_before_exec(env_cpu(env), addr, info);
+}
+
+static inline
+void atomic_trace_ld_post(CPUArchState *env, target_ulong addr, uint16_t info)
+{
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
+}
+
+static inline
+void atomic_trace_st_pre(CPUArchState *env, target_ulong addr, uint16_t info)
+{
+    trace_guest_mem_before_exec(env_cpu(env), addr, info);
+}
+
+static inline
+void atomic_trace_st_post(CPUArchState *env, target_ulong addr, uint16_t info)
+{
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
+}
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index 287433d809..837676231f 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -18,6 +18,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "qemu/plugin.h"
 #include "trace/mem.h"
 
 #if DATA_SIZE == 16
@@ -59,26 +60,6 @@
 # define ABI_TYPE  uint32_t
 #endif
 
-#define ATOMIC_TRACE_RMW do {                                           \
-        uint8_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false); \
-                                                                        \
-        trace_guest_mem_before_exec(env_cpu(env), addr, info);      \
-        trace_guest_mem_before_exec(env_cpu(env), addr,             \
-                                    info | TRACE_MEM_ST);               \
-    } while (0)
-
-#define ATOMIC_TRACE_LD do {                                            \
-        uint8_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false); \
-                                                                        \
-        trace_guest_mem_before_exec(env_cpu(env), addr, info);      \
-    } while (0)
-
-# define ATOMIC_TRACE_ST do {                                           \
-        uint8_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, true); \
-                                                                        \
-        trace_guest_mem_before_exec(env_cpu(env), addr, info);      \
-    } while (0)
-
 /* Define host-endian atomic operations.  Note that END is used within
    the ATOMIC_NAME macro, and redefined below.  */
 #if DATA_SIZE == 1
@@ -98,14 +79,17 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
     ATOMIC_MMU_DECLS;
     DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
     DATA_TYPE ret;
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false,
+                                                           ATOMIC_MMU_IDX);
 
-    ATOMIC_TRACE_RMW;
+    atomic_trace_rmw_pre(env, addr, info);
 #if DATA_SIZE == 16
     ret = atomic16_cmpxchg(haddr, cmpv, newv);
 #else
     ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv);
 #endif
     ATOMIC_MMU_CLEANUP;
+    atomic_trace_rmw_post(env, addr, info);
     return ret;
 }
 
@@ -115,10 +99,13 @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
     ATOMIC_MMU_DECLS;
     DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false,
+                                                           ATOMIC_MMU_IDX);
 
-    ATOMIC_TRACE_LD;
+    atomic_trace_ld_pre(env, addr, info);
     val = atomic16_read(haddr);
     ATOMIC_MMU_CLEANUP;
+    atomic_trace_ld_post(env, addr, info);
     return val;
 }
 
@@ -127,10 +114,13 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
 {
     ATOMIC_MMU_DECLS;
     DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, true,
+                                                          ATOMIC_MMU_IDX);
 
-    ATOMIC_TRACE_ST;
+    atomic_trace_st_pre(env, addr, info);
     atomic16_set(haddr, val);
     ATOMIC_MMU_CLEANUP;
+    atomic_trace_st_post(env, addr, info);
 }
 #endif
 #else
@@ -140,10 +130,13 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
     ATOMIC_MMU_DECLS;
     DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
     DATA_TYPE ret;
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false,
+                                                          ATOMIC_MMU_IDX);
 
-    ATOMIC_TRACE_RMW;
+    atomic_trace_rmw_pre(env, addr, info);
     ret = atomic_xchg__nocheck(haddr, val);
     ATOMIC_MMU_CLEANUP;
+    atomic_trace_rmw_post(env, addr, info);
     return ret;
 }
 
@@ -154,10 +147,14 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
     ATOMIC_MMU_DECLS;                                               \
     DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
     DATA_TYPE ret;                                                  \
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT,   \
+                                                           false,   \
+                                                           ATOMIC_MMU_IDX); \
                                                                     \
-    ATOMIC_TRACE_RMW;                                               \
+    atomic_trace_rmw_pre(env, addr, info);                          \
     ret = atomic_##X(haddr, val);                                   \
     ATOMIC_MMU_CLEANUP;                                             \
+    atomic_trace_rmw_post(env, addr, info);                         \
     return ret;                                                     \
 }
 
@@ -186,8 +183,11 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
     ATOMIC_MMU_DECLS;                                               \
     XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \
     XDATA_TYPE cmp, old, new, val = xval;                           \
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT,   \
+                                                           false,   \
+                                                           ATOMIC_MMU_IDX); \
                                                                     \
-    ATOMIC_TRACE_RMW;                                               \
+    atomic_trace_rmw_pre(env, addr, info);                          \
     smp_mb();                                                       \
     cmp = atomic_read__nocheck(haddr);                              \
     do {                                                            \
@@ -195,6 +195,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
         cmp = atomic_cmpxchg__nocheck(haddr, old, new);             \
     } while (cmp != old);                                           \
     ATOMIC_MMU_CLEANUP;                                             \
+    atomic_trace_rmw_post(env, addr, info);                         \
     return RET;                                                     \
 }
 
@@ -232,14 +233,18 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
     ATOMIC_MMU_DECLS;
     DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
     DATA_TYPE ret;
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT,
+                                                           false,
+                                                           ATOMIC_MMU_IDX);
 
-    ATOMIC_TRACE_RMW;
+    atomic_trace_rmw_pre(env, addr, info);
 #if DATA_SIZE == 16
     ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
 #else
     ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
 #endif
     ATOMIC_MMU_CLEANUP;
+    atomic_trace_rmw_post(env, addr, info);
     return BSWAP(ret);
 }
 
@@ -249,10 +254,14 @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
     ATOMIC_MMU_DECLS;
     DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT,
+                                                           false,
+                                                           ATOMIC_MMU_IDX);
 
-    ATOMIC_TRACE_LD;
+    atomic_trace_ld_pre(env, addr, info);
     val = atomic16_read(haddr);
     ATOMIC_MMU_CLEANUP;
+    atomic_trace_ld_post(env, addr, info);
     return BSWAP(val);
 }
 
@@ -261,11 +270,16 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
 {
     ATOMIC_MMU_DECLS;
     DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT,
+                                                           true,
+                                                           ATOMIC_MMU_IDX);
 
-    ATOMIC_TRACE_ST;
+    val = BSWAP(val);
+    atomic_trace_st_pre(env, addr, info);
     val = BSWAP(val);
     atomic16_set(haddr, val);
     ATOMIC_MMU_CLEANUP;
+    atomic_trace_st_post(env, addr, info);
 }
 #endif
 #else
@@ -275,10 +289,14 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
     ATOMIC_MMU_DECLS;
     DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
     ABI_TYPE ret;
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT,
+                                                           false,
+                                                           ATOMIC_MMU_IDX);
 
-    ATOMIC_TRACE_RMW;
+    atomic_trace_rmw_pre(env, addr, info);
     ret = atomic_xchg__nocheck(haddr, BSWAP(val));
     ATOMIC_MMU_CLEANUP;
+    atomic_trace_rmw_post(env, addr, info);
     return BSWAP(ret);
 }
 
@@ -289,10 +307,14 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
     ATOMIC_MMU_DECLS;                                               \
     DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
     DATA_TYPE ret;                                                  \
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT,   \
+                                                           false,   \
+                                                           ATOMIC_MMU_IDX); \
                                                                     \
-    ATOMIC_TRACE_RMW;                                               \
+    atomic_trace_rmw_pre(env, addr, info);                          \
     ret = atomic_##X(haddr, BSWAP(val));                            \
     ATOMIC_MMU_CLEANUP;                                             \
+    atomic_trace_rmw_post(env, addr, info);                         \
     return BSWAP(ret);                                              \
 }
 
@@ -319,8 +341,11 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
     ATOMIC_MMU_DECLS;                                               \
     XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \
     XDATA_TYPE ldo, ldn, old, new, val = xval;                      \
+    uint16_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT,   \
+                                                           false,   \
+                                                           ATOMIC_MMU_IDX); \
                                                                     \
-    ATOMIC_TRACE_RMW;                                               \
+    atomic_trace_rmw_pre(env, addr, info);                          \
     smp_mb();                                                       \
     ldn = atomic_read__nocheck(haddr);                              \
     do {                                                            \
@@ -328,6 +353,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
         ldn = atomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new));      \
     } while (ldo != ldn);                                           \
     ATOMIC_MMU_CLEANUP;                                             \
+    atomic_trace_rmw_post(env, addr, info);                         \
     return RET;                                                     \
 }
 
@@ -355,10 +381,6 @@ GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
 #undef MEND
 #endif /* DATA_SIZE > 1 */
 
-#undef ATOMIC_TRACE_ST
-#undef ATOMIC_TRACE_LD
-#undef ATOMIC_TRACE_RMW
-
 #undef BSWAP
 #undef ABI_TYPE
 #undef DATA_TYPE
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 48272c781b..c01f59c743 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -238,8 +238,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
     uint32_t flags;
     uint32_t cflags = 1;
     uint32_t cf_mask = cflags & CF_HASH_MASK;
-    /* volatile because we modify it between setjmp and longjmp */
-    volatile bool in_exclusive_region = false;
 
     if (sigsetjmp(cpu->jmp_env, 0) == 0) {
         tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
@@ -253,7 +251,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
 
         /* Since we got here, we know that parallel_cpus must be true.  */
         parallel_cpus = false;
-        in_exclusive_region = true;
         cc->cpu_exec_enter(cpu);
         /* execute the generated code */
         trace_exec_tb(tb, pc);
@@ -271,9 +268,10 @@ void cpu_exec_step_atomic(CPUState *cpu)
             qemu_mutex_unlock_iothread();
         }
         assert_no_pages_locked();
+        qemu_plugin_disable_mem_helpers(cpu);
     }
 
-    if (in_exclusive_region) {
+    if (cpu_in_exclusive_context(cpu)) {
         /* We might longjump out of either the codegen or the
          * execution, so must make sure we only end the exclusive
          * region if we started it.
@@ -704,6 +702,8 @@ int cpu_exec(CPUState *cpu)
         if (qemu_mutex_iothread_locked()) {
             qemu_mutex_unlock_iothread();
         }
+        qemu_plugin_disable_mem_helpers(cpu);
+
         assert_no_pages_locked();
     }
 
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 5eebddcca8..68487dceb5 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -34,6 +34,9 @@
 #include "qemu/atomic.h"
 #include "qemu/atomic128.h"
 #include "translate-all.h"
+#ifdef CONFIG_PLUGIN
+#include "qemu/plugin-memory.h"
+#endif
 
 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
 /* #define DEBUG_TLB */
@@ -1051,7 +1054,8 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
  * NOTE: This function will trigger an exception if the page is
  * not executable.
  */
-tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
+                                        void **hostp)
 {
     uintptr_t mmu_idx = cpu_mmu_index(env, true);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
@@ -1077,13 +1081,24 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
 
     if (unlikely(entry->addr_code & TLB_MMIO)) {
         /* The region is not backed by RAM.  */
+        if (hostp) {
+            *hostp = NULL;
+        }
         return -1;
     }
 
     p = (void *)((uintptr_t)addr + entry->addend);
+    if (hostp) {
+        *hostp = p;
+    }
     return qemu_ram_addr_from_host_nofail(p);
 }
 
+tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
+{
+    return get_page_addr_code_hostp(env, addr, NULL);
+}
+
 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
 {
@@ -1235,6 +1250,45 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
     return (void *)((uintptr_t)addr + entry->addend);
 }
 
+
+#ifdef CONFIG_PLUGIN
+/*
+ * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
+ * This should be a hot path as we will have just looked this path up
+ * in the softmmu lookup code (or helper). We don't handle re-fills or
+ * checking the victim table. This is purely informational.
+ *
+ * This should never fail as the memory access being instrumented
+ * should have just filled the TLB.
+ */
+
+bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
+                       bool is_store, struct qemu_plugin_hwaddr *data)
+{
+    CPUArchState *env = cpu->env_ptr;
+    CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
+    uintptr_t index = tlb_index(env, mmu_idx, addr);
+    target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
+
+    if (likely(tlb_hit(tlb_addr, addr))) {
+        /* We must have an iotlb entry for MMIO */
+        if (tlb_addr & TLB_MMIO) {
+            CPUIOTLBEntry *iotlbentry;
+            iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+            data->is_io = true;
+            data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+            data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+        } else {
+            data->is_io = false;
+            data->v.ram.hostaddr = addr + tlbe->addend;
+        }
+        return true;
+    }
+    return false;
+}
+
+#endif
+
 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
  * operations, or io operations to proceed.  Return the host address.  */
 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
@@ -1811,6 +1865,9 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
 #define ATOMIC_MMU_DECLS
 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
 #define ATOMIC_MMU_CLEANUP
+#define ATOMIC_MMU_IDX   get_mmuidx(oi)
+
+#include "atomic_common.inc.c"
 
 #define DATA_SIZE 1
 #include "atomic_template.h"
@@ -1853,6 +1910,7 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
 #define DATA_SIZE 8
 #include "atomic_template.h"
 #endif
+#undef ATOMIC_MMU_IDX
 
 /* Code access functions.  */
 
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
new file mode 100644
index 0000000000..51580d51a0
--- /dev/null
+++ b/accel/tcg/plugin-gen.c
@@ -0,0 +1,932 @@
+/*
+ * plugin-gen.c - TCG-related bits of plugin infrastructure
+ *
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * We support instrumentation at an instruction granularity. That is,
+ * if a plugin wants to instrument the memory accesses performed by a
+ * particular instruction, it can just do that instead of instrumenting
+ * all memory accesses. Thus, in order to do this we first have to
+ * translate a TB, so that plugins can decide what/where to instrument.
+ *
+ * Injecting the desired instrumentation could be done with a second
+ * translation pass that combined the instrumentation requests, but that
+ * would be ugly and inefficient since we would decode the guest code twice.
+ * Instead, during TB translation we add "empty" instrumentation calls for all
+ * possible instrumentation events, and then once we collect the instrumentation
+ * requests from plugins, we either "fill in" those empty events or remove them
+ * if they have no requests.
+ *
+ * When "filling in" an event we first copy the empty callback's TCG ops. This
+ * might seem unnecessary, but it is done to support an arbitrary number
+ * of callbacks per event. Take for example a regular instruction callback.
+ * We first generate a callback to an empty helper function. Then, if two
+ * plugins register one callback each for this instruction, we make two copies
+ * of the TCG ops generated for the empty callback, substituting the function
+ * pointer that points to the empty helper function with the plugins' desired
+ * callback functions. After that we remove the empty callback's ops.
+ *
+ * Note that the location in TCGOp.args[] of the pointer to a helper function
+ * varies across different guest and host architectures. Instead of duplicating
+ * the logic that figures this out, we rely on the fact that the empty
+ * callbacks point to empty functions that are unique pointers in the program.
+ * Thus, to find the right location we just have to look for a match in
+ * TCGOp.args[]. This is the main reason why we first copy an empty callback's
+ * TCG ops and then fill them in; regardless of whether we have one or many
+ * callbacks for that event, the logic to add all of them is the same.
+ *
+ * When generating more than one callback per event, we make a small
+ * optimization to avoid generating redundant operations. For instance, for the
+ * second and all subsequent callbacks of an event, we do not need to reload the
+ * CPU's index into a TCG temp, since the first callback did it already.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-op.h"
+#include "trace/mem.h"
+#include "exec/exec-all.h"
+#include "exec/plugin-gen.h"
+#include "exec/translator.h"
+
+#ifdef CONFIG_SOFTMMU
+# define CONFIG_SOFTMMU_GATE 1
+#else
+# define CONFIG_SOFTMMU_GATE 0
+#endif
+
+/*
+ * plugin_cb_start TCG op args[]:
+ * 0: enum plugin_gen_from
+ * 1: enum plugin_gen_cb
+ * 2: set to 1 for mem callback that is a write, 0 otherwise.
+ */
+
+enum plugin_gen_from {
+    PLUGIN_GEN_FROM_TB,
+    PLUGIN_GEN_FROM_INSN,
+    PLUGIN_GEN_FROM_MEM,
+    PLUGIN_GEN_AFTER_INSN,
+    PLUGIN_GEN_N_FROMS,
+};
+
+enum plugin_gen_cb {
+    PLUGIN_GEN_CB_UDATA,
+    PLUGIN_GEN_CB_INLINE,
+    PLUGIN_GEN_CB_MEM,
+    PLUGIN_GEN_ENABLE_MEM_HELPER,
+    PLUGIN_GEN_DISABLE_MEM_HELPER,
+    PLUGIN_GEN_N_CBS,
+};
+
+/*
+ * These helpers are stubs that get dynamically switched out for calls
+ * direct to the plugin if they are subscribed to.
+ */
+void HELPER(plugin_vcpu_udata_cb)(uint32_t cpu_index, void *udata)
+{ }
+
+void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
+                                qemu_plugin_meminfo_t info, uint64_t vaddr,
+                                void *userdata)
+{ }
+
+static void do_gen_mem_cb(TCGv vaddr, uint32_t info)
+{
+    TCGv_i32 cpu_index = tcg_temp_new_i32();
+    TCGv_i32 meminfo = tcg_const_i32(info);
+    TCGv_i64 vaddr64 = tcg_temp_new_i64();
+    TCGv_ptr udata = tcg_const_ptr(NULL);
+
+    tcg_gen_ld_i32(cpu_index, cpu_env,
+                   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+    tcg_gen_extu_tl_i64(vaddr64, vaddr);
+
+    gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, vaddr64, udata);
+
+    tcg_temp_free_ptr(udata);
+    tcg_temp_free_i64(vaddr64);
+    tcg_temp_free_i32(meminfo);
+    tcg_temp_free_i32(cpu_index);
+}
+
+static void gen_empty_udata_cb(void)
+{
+    TCGv_i32 cpu_index = tcg_temp_new_i32();
+    TCGv_ptr udata = tcg_const_ptr(NULL); /* will be overwritten later */
+
+    tcg_gen_ld_i32(cpu_index, cpu_env,
+                   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+    gen_helper_plugin_vcpu_udata_cb(cpu_index, udata);
+
+    tcg_temp_free_ptr(udata);
+    tcg_temp_free_i32(cpu_index);
+}
+
+/*
+ * For now we only support addi_i64.
+ * When we support more ops, we can generate one empty inline cb for each.
+ */
+static void gen_empty_inline_cb(void)
+{
+    TCGv_i64 val = tcg_temp_new_i64();
+    TCGv_ptr ptr = tcg_const_ptr(NULL); /* overwritten later */
+
+    tcg_gen_ld_i64(val, ptr, 0);
+    /* pass an immediate != 0 so that it doesn't get optimized away */
+    tcg_gen_addi_i64(val, val, 0xdeadface);
+    tcg_gen_st_i64(val, ptr, 0);
+    tcg_temp_free_ptr(ptr);
+    tcg_temp_free_i64(val);
+}
+
+static void gen_empty_mem_cb(TCGv addr, uint32_t info)
+{
+    do_gen_mem_cb(addr, info);
+}
+
+/*
+ * Share the same function for enable/disable. When enabling, the NULL
+ * pointer will be overwritten later.
+ */
+static void gen_empty_mem_helper(void)
+{
+    TCGv_ptr ptr;
+
+    ptr = tcg_const_ptr(NULL);
+    tcg_gen_st_ptr(ptr, cpu_env, offsetof(CPUState, plugin_mem_cbs) -
+                                 offsetof(ArchCPU, env));
+    tcg_temp_free_ptr(ptr);
+}
+
+static inline
+void gen_plugin_cb_start(enum plugin_gen_from from,
+                         enum plugin_gen_cb type, unsigned wr)
+{
+    TCGOp *op;
+
+    tcg_gen_plugin_cb_start(from, type, wr);
+    op = tcg_last_op();
+    QSIMPLEQ_INSERT_TAIL(&tcg_ctx->plugin_ops, op, plugin_link);
+}
+
+static void gen_wrapped(enum plugin_gen_from from,
+                        enum plugin_gen_cb type, void (*func)(void))
+{
+    gen_plugin_cb_start(from, type, 0);
+    func();
+    tcg_gen_plugin_cb_end();
+}
+
+static inline void plugin_gen_empty_callback(enum plugin_gen_from from)
+{
+    switch (from) {
+    case PLUGIN_GEN_AFTER_INSN:
+        gen_wrapped(from, PLUGIN_GEN_DISABLE_MEM_HELPER,
+                    gen_empty_mem_helper);
+        break;
+    case PLUGIN_GEN_FROM_INSN:
+        /*
+         * Note: plugin_gen_inject() relies on ENABLE_MEM_HELPER being
+         * the first callback of an instruction
+         */
+        gen_wrapped(from, PLUGIN_GEN_ENABLE_MEM_HELPER,
+                    gen_empty_mem_helper);
+        /* fall through */
+    case PLUGIN_GEN_FROM_TB:
+        gen_wrapped(from, PLUGIN_GEN_CB_UDATA, gen_empty_udata_cb);
+        gen_wrapped(from, PLUGIN_GEN_CB_INLINE, gen_empty_inline_cb);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+union mem_gen_fn {
+    void (*mem_fn)(TCGv, uint32_t);
+    void (*inline_fn)(void);
+};
+
+static void gen_mem_wrapped(enum plugin_gen_cb type,
+                            const union mem_gen_fn *f, TCGv addr,
+                            uint32_t info, bool is_mem)
+{
+    int wr = !!(info & TRACE_MEM_ST);
+
+    gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, wr);
+    if (is_mem) {
+        f->mem_fn(addr, info);
+    } else {
+        f->inline_fn();
+    }
+    tcg_gen_plugin_cb_end();
+}
+
+void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
+{
+    union mem_gen_fn fn;
+
+    fn.mem_fn = gen_empty_mem_cb;
+    gen_mem_wrapped(PLUGIN_GEN_CB_MEM, &fn, addr, info, true);
+
+    fn.inline_fn = gen_empty_inline_cb;
+    gen_mem_wrapped(PLUGIN_GEN_CB_INLINE, &fn, 0, info, false);
+}
+
+static TCGOp *find_op(TCGOp *op, TCGOpcode opc)
+{
+    while (op) {
+        if (op->opc == opc) {
+            return op;
+        }
+        op = QTAILQ_NEXT(op, link);
+    }
+    return NULL;
+}
+
+static TCGOp *rm_ops_range(TCGOp *begin, TCGOp *end)
+{
+    TCGOp *ret = QTAILQ_NEXT(end, link);
+
+    QTAILQ_REMOVE_SEVERAL(&tcg_ctx->ops, begin, end, link);
+    return ret;
+}
+
+/* remove all ops until (and including) plugin_cb_end */
+static TCGOp *rm_ops(TCGOp *op)
+{
+    TCGOp *end_op = find_op(op, INDEX_op_plugin_cb_end);
+
+    tcg_debug_assert(end_op);
+    return rm_ops_range(op, end_op);
+}
+
+static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op)
+{
+    *begin_op = QTAILQ_NEXT(*begin_op, link);
+    tcg_debug_assert(*begin_op);
+    op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc);
+    memcpy(op->args, (*begin_op)->args, sizeof(op->args));
+    return op;
+}
+
+static TCGOp *copy_op(TCGOp **begin_op, TCGOp *op, TCGOpcode opc)
+{
+    op = copy_op_nocheck(begin_op, op);
+    tcg_debug_assert((*begin_op)->opc == opc);
+    return op;
+}
+
+static TCGOp *copy_extu_i32_i64(TCGOp **begin_op, TCGOp *op)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* mov_i32 */
+        op = copy_op(begin_op, op, INDEX_op_mov_i32);
+        /* movi_i32 */
+        op = copy_op(begin_op, op, INDEX_op_movi_i32);
+    } else {
+        /* extu_i32_i64 */
+        op = copy_op(begin_op, op, INDEX_op_extu_i32_i64);
+    }
+    return op;
+}
+
+static TCGOp *copy_mov_i64(TCGOp **begin_op, TCGOp *op)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* 2x mov_i32 */
+        op = copy_op(begin_op, op, INDEX_op_mov_i32);
+        op = copy_op(begin_op, op, INDEX_op_mov_i32);
+    } else {
+        /* mov_i64 */
+        op = copy_op(begin_op, op, INDEX_op_mov_i64);
+    }
+    return op;
+}
+
+static TCGOp *copy_movi_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* 2x movi_i32 */
+        op = copy_op(begin_op, op, INDEX_op_movi_i32);
+        op->args[1] = v;
+
+        op = copy_op(begin_op, op, INDEX_op_movi_i32);
+        op->args[1] = v >> 32;
+    } else {
+        /* movi_i64 */
+        op = copy_op(begin_op, op, INDEX_op_movi_i64);
+        op->args[1] = v;
+    }
+    return op;
+}
+
+static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr)
+{
+    if (UINTPTR_MAX == UINT32_MAX) {
+        /* movi_i32 */
+        op = copy_op(begin_op, op, INDEX_op_movi_i32);
+        op->args[1] = (uintptr_t)ptr;
+    } else {
+        /* movi_i64 */
+        op = copy_movi_i64(begin_op, op, (uint64_t)(uintptr_t)ptr);
+    }
+    return op;
+}
+
+static TCGOp *copy_const_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
+{
+    return copy_movi_i64(begin_op, op, v);
+}
+
+static TCGOp *copy_extu_tl_i64(TCGOp **begin_op, TCGOp *op)
+{
+    if (TARGET_LONG_BITS == 32) {
+        /* extu_i32_i64 */
+        op = copy_extu_i32_i64(begin_op, op);
+    } else {
+        /* mov_i64 */
+        op = copy_mov_i64(begin_op, op);
+    }
+    return op;
+}
+
+static TCGOp *copy_ld_i64(TCGOp **begin_op, TCGOp *op)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* 2x ld_i32 */
+        op = copy_op(begin_op, op, INDEX_op_ld_i32);
+        op = copy_op(begin_op, op, INDEX_op_ld_i32);
+    } else {
+        /* ld_i64 */
+        op = copy_op(begin_op, op, INDEX_op_ld_i64);
+    }
+    return op;
+}
+
+static TCGOp *copy_st_i64(TCGOp **begin_op, TCGOp *op)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* 2x st_i32 */
+        op = copy_op(begin_op, op, INDEX_op_st_i32);
+        op = copy_op(begin_op, op, INDEX_op_st_i32);
+    } else {
+        /* st_i64 */
+        op = copy_op(begin_op, op, INDEX_op_st_i64);
+    }
+    return op;
+}
+
+static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* all 32-bit backends must implement add2_i32 */
+        g_assert(TCG_TARGET_HAS_add2_i32);
+        op = copy_op(begin_op, op, INDEX_op_add2_i32);
+    } else {
+        op = copy_op(begin_op, op, INDEX_op_add_i64);
+    }
+    return op;
+}
+
+static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op)
+{
+    if (UINTPTR_MAX == UINT32_MAX) {
+        /* st_i32 */
+        op = copy_op(begin_op, op, INDEX_op_st_i32);
+    } else {
+        /* st_i64 */
+        op = copy_st_i64(begin_op, op);
+    }
+    return op;
+}
+
+static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func,
+                        void *func, unsigned tcg_flags, int *cb_idx)
+{
+    /* copy all ops until the call */
+    do {
+        op = copy_op_nocheck(begin_op, op);
+    } while (op->opc != INDEX_op_call);
+
+    /* fill in the op call */
+    op->param1 = (*begin_op)->param1;
+    op->param2 = (*begin_op)->param2;
+    tcg_debug_assert(op->life == 0);
+    if (*cb_idx == -1) {
+        int i;
+
+        /*
+         * Instead of working out the position of the callback in args[], just
+         * look for @empty_func, since it should be a unique pointer.
+         */
+        for (i = 0; i < MAX_OPC_PARAM_ARGS; i++) {
+            if ((uintptr_t)(*begin_op)->args[i] == (uintptr_t)empty_func) {
+                *cb_idx = i;
+                break;
+            }
+        }
+        tcg_debug_assert(i < MAX_OPC_PARAM_ARGS);
+    }
+    op->args[*cb_idx] = (uintptr_t)func;
+    op->args[*cb_idx + 1] = tcg_flags;
+
+    return op;
+}
+
+static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb,
+                              TCGOp *begin_op, TCGOp *op, int *cb_idx)
+{
+    /* const_ptr */
+    op = copy_const_ptr(&begin_op, op, cb->userp);
+
+    /* copy the ld_i32, but note that we only have to copy it once */
+    begin_op = QTAILQ_NEXT(begin_op, link);
+    tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32);
+    if (*cb_idx == -1) {
+        op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32);
+        memcpy(op->args, begin_op->args, sizeof(op->args));
+    }
+
+    /* call */
+    op = copy_call(&begin_op, op, HELPER(plugin_vcpu_udata_cb),
+                   cb->f.vcpu_udata, cb->tcg_flags, cb_idx);
+
+    return op;
+}
+
+static TCGOp *append_inline_cb(const struct qemu_plugin_dyn_cb *cb,
+                               TCGOp *begin_op, TCGOp *op,
+                               int *unused)
+{
+    /* const_ptr */
+    op = copy_const_ptr(&begin_op, op, cb->userp);
+
+    /* ld_i64 */
+    op = copy_ld_i64(&begin_op, op);
+
+    /* const_i64 */
+    op = copy_const_i64(&begin_op, op, cb->inline_insn.imm);
+
+    /* add_i64 */
+    op = copy_add_i64(&begin_op, op);
+
+    /* st_i64 */
+    op = copy_st_i64(&begin_op, op);
+
+    return op;
+}
+
+static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb,
+                            TCGOp *begin_op, TCGOp *op, int *cb_idx)
+{
+    enum plugin_gen_cb type = begin_op->args[1];
+
+    tcg_debug_assert(type == PLUGIN_GEN_CB_MEM);
+
+    /* const_i32 == movi_i32 ("info", so it remains as is) */
+    op = copy_op(&begin_op, op, INDEX_op_movi_i32);
+
+    /* const_ptr */
+    op = copy_const_ptr(&begin_op, op, cb->userp);
+
+    /* copy the ld_i32, but note that we only have to copy it once */
+    begin_op = QTAILQ_NEXT(begin_op, link);
+    tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32);
+    if (*cb_idx == -1) {
+        op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32);
+        memcpy(op->args, begin_op->args, sizeof(op->args));
+    }
+
+    /* extu_tl_i64 */
+    op = copy_extu_tl_i64(&begin_op, op);
+
+    if (type == PLUGIN_GEN_CB_MEM) {
+        /* call */
+        op = copy_call(&begin_op, op, HELPER(plugin_vcpu_mem_cb),
+                       cb->f.vcpu_udata, cb->tcg_flags, cb_idx);
+    }
+
+    return op;
+}
+
+typedef TCGOp *(*inject_fn)(const struct qemu_plugin_dyn_cb *cb,
+                            TCGOp *begin_op, TCGOp *op, int *intp);
+typedef bool (*op_ok_fn)(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb);
+
+static bool op_ok(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb)
+{
+    return true;
+}
+
+static bool op_rw(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb)
+{
+    int w;
+
+    w = op->args[2];
+    return !!(cb->rw & (w + 1));
+}
+
+static inline
+void inject_cb_type(const GArray *cbs, TCGOp *begin_op, inject_fn inject,
+                    op_ok_fn ok)
+{
+    TCGOp *end_op;
+    TCGOp *op;
+    int cb_idx = -1;
+    int i;
+
+    if (!cbs || cbs->len == 0) {
+        rm_ops(begin_op);
+        return;
+    }
+
+    end_op = find_op(begin_op, INDEX_op_plugin_cb_end);
+    tcg_debug_assert(end_op);
+
+    op = end_op;
+    for (i = 0; i < cbs->len; i++) {
+        struct qemu_plugin_dyn_cb *cb =
+            &g_array_index(cbs, struct qemu_plugin_dyn_cb, i);
+
+        if (!ok(begin_op, cb)) {
+            continue;
+        }
+        op = inject(cb, begin_op, op, &cb_idx);
+    }
+    rm_ops_range(begin_op, end_op);
+}
+
+static void
+inject_udata_cb(const GArray *cbs, TCGOp *begin_op)
+{
+    inject_cb_type(cbs, begin_op, append_udata_cb, op_ok);
+}
+
+static void
+inject_inline_cb(const GArray *cbs, TCGOp *begin_op, op_ok_fn ok)
+{
+    inject_cb_type(cbs, begin_op, append_inline_cb, ok);
+}
+
+static void
+inject_mem_cb(const GArray *cbs, TCGOp *begin_op)
+{
+    inject_cb_type(cbs, begin_op, append_mem_cb, op_rw);
+}
+
+/* we could change the ops in place, but we can reuse more code by copying */
+static void inject_mem_helper(TCGOp *begin_op, GArray *arr)
+{
+    TCGOp *orig_op = begin_op;
+    TCGOp *end_op;
+    TCGOp *op;
+
+    end_op = find_op(begin_op, INDEX_op_plugin_cb_end);
+    tcg_debug_assert(end_op);
+
+    /* const ptr */
+    op = copy_const_ptr(&begin_op, end_op, arr);
+
+    /* st_ptr */
+    op = copy_st_ptr(&begin_op, op);
+
+    rm_ops_range(orig_op, end_op);
+}
+
+/*
+ * Tracking memory accesses performed from helpers requires extra work.
+ * If an instruction is emulated with helpers, we do two things:
+ * (1) copy the CB descriptors, and keep track of it so that they can be
+ * freed later on, and (2) point CPUState.plugin_mem_cbs to the descriptors, so
+ * that we can read them at run-time (i.e. when the helper executes).
+ * This run-time access is performed from qemu_plugin_vcpu_mem_cb.
+ *
+ * Note that plugin_gen_disable_mem_helpers undoes (2). Since it
+ * is possible that the code we generate after the instruction is
+ * dead, we also add checks before generating tb_exit etc.
+ */
+static void inject_mem_enable_helper(struct qemu_plugin_insn *plugin_insn,
+                                     TCGOp *begin_op)
+{
+    GArray *cbs[2];
+    GArray *arr;
+    size_t n_cbs, i;
+
+    cbs[0] = plugin_insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR];
+    cbs[1] = plugin_insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE];
+
+    n_cbs = 0;
+    for (i = 0; i < ARRAY_SIZE(cbs); i++) {
+        n_cbs += cbs[i]->len;
+    }
+
+    plugin_insn->mem_helper = plugin_insn->calls_helpers && n_cbs;
+    if (likely(!plugin_insn->mem_helper)) {
+        rm_ops(begin_op);
+        return;
+    }
+
+    arr = g_array_sized_new(false, false,
+                            sizeof(struct qemu_plugin_dyn_cb), n_cbs);
+
+    for (i = 0; i < ARRAY_SIZE(cbs); i++) {
+        g_array_append_vals(arr, cbs[i]->data, cbs[i]->len);
+    }
+
+    qemu_plugin_add_dyn_cb_arr(arr);
+    inject_mem_helper(begin_op, arr);
+}
+
+static void inject_mem_disable_helper(struct qemu_plugin_insn *plugin_insn,
+                                      TCGOp *begin_op)
+{
+    if (likely(!plugin_insn->mem_helper)) {
+        rm_ops(begin_op);
+        return;
+    }
+    inject_mem_helper(begin_op, NULL);
+}
+
+/* called before finishing a TB with exit_tb, goto_tb or goto_ptr */
+void plugin_gen_disable_mem_helpers(void)
+{
+    TCGv_ptr ptr;
+
+    if (likely(tcg_ctx->plugin_insn == NULL ||
+               !tcg_ctx->plugin_insn->mem_helper)) {
+        return;
+    }
+    ptr = tcg_const_ptr(NULL);
+    tcg_gen_st_ptr(ptr, cpu_env, offsetof(CPUState, plugin_mem_cbs) -
+                                 offsetof(ArchCPU, env));
+    tcg_temp_free_ptr(ptr);
+    tcg_ctx->plugin_insn->mem_helper = false;
+}
+
+static void plugin_gen_tb_udata(const struct qemu_plugin_tb *ptb,
+                                TCGOp *begin_op)
+{
+    inject_udata_cb(ptb->cbs[PLUGIN_CB_REGULAR], begin_op);
+}
+
+static void plugin_gen_tb_inline(const struct qemu_plugin_tb *ptb,
+                                 TCGOp *begin_op)
+{
+    inject_inline_cb(ptb->cbs[PLUGIN_CB_INLINE], begin_op, op_ok);
+}
+
+static void plugin_gen_insn_udata(const struct qemu_plugin_tb *ptb,
+                                  TCGOp *begin_op, int insn_idx)
+{
+    struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx);
+
+    inject_udata_cb(insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR], begin_op);
+}
+
+static void plugin_gen_insn_inline(const struct qemu_plugin_tb *ptb,
+                                   TCGOp *begin_op, int insn_idx)
+{
+    struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx);
+    inject_inline_cb(insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
+                     begin_op, op_ok);
+}
+
+static void plugin_gen_mem_regular(const struct qemu_plugin_tb *ptb,
+                                   TCGOp *begin_op, int insn_idx)
+{
+    struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx);
+    inject_mem_cb(insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR], begin_op);
+}
+
+static void plugin_gen_mem_inline(const struct qemu_plugin_tb *ptb,
+                                  TCGOp *begin_op, int insn_idx)
+{
+    const GArray *cbs;
+    struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx);
+
+    cbs = insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE];
+    inject_inline_cb(cbs, begin_op, op_rw);
+}
+
+static void plugin_gen_enable_mem_helper(const struct qemu_plugin_tb *ptb,
+                                         TCGOp *begin_op, int insn_idx)
+{
+    struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx);
+    inject_mem_enable_helper(insn, begin_op);
+}
+
+static void plugin_gen_disable_mem_helper(const struct qemu_plugin_tb *ptb,
+                                          TCGOp *begin_op, int insn_idx)
+{
+    struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx);
+    inject_mem_disable_helper(insn, begin_op);
+}
+
+static void plugin_inject_cb(const struct qemu_plugin_tb *ptb, TCGOp *begin_op,
+                             int insn_idx)
+{
+    enum plugin_gen_from from = begin_op->args[0];
+    enum plugin_gen_cb type = begin_op->args[1];
+
+    switch (from) {
+    case PLUGIN_GEN_FROM_TB:
+        switch (type) {
+        case PLUGIN_GEN_CB_UDATA:
+            plugin_gen_tb_udata(ptb, begin_op);
+            return;
+        case PLUGIN_GEN_CB_INLINE:
+            plugin_gen_tb_inline(ptb, begin_op);
+            return;
+        default:
+            g_assert_not_reached();
+        }
+    case PLUGIN_GEN_FROM_INSN:
+        switch (type) {
+        case PLUGIN_GEN_CB_UDATA:
+            plugin_gen_insn_udata(ptb, begin_op, insn_idx);
+            return;
+        case PLUGIN_GEN_CB_INLINE:
+            plugin_gen_insn_inline(ptb, begin_op, insn_idx);
+            return;
+        case PLUGIN_GEN_ENABLE_MEM_HELPER:
+            plugin_gen_enable_mem_helper(ptb, begin_op, insn_idx);
+            return;
+        default:
+            g_assert_not_reached();
+        }
+    case PLUGIN_GEN_FROM_MEM:
+        switch (type) {
+        case PLUGIN_GEN_CB_MEM:
+            plugin_gen_mem_regular(ptb, begin_op, insn_idx);
+            return;
+        case PLUGIN_GEN_CB_INLINE:
+            plugin_gen_mem_inline(ptb, begin_op, insn_idx);
+            return;
+        default:
+            g_assert_not_reached();
+        }
+    case PLUGIN_GEN_AFTER_INSN:
+        switch (type) {
+        case PLUGIN_GEN_DISABLE_MEM_HELPER:
+            plugin_gen_disable_mem_helper(ptb, begin_op, insn_idx);
+            return;
+        default:
+            g_assert_not_reached();
+        }
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/* #define DEBUG_PLUGIN_GEN_OPS */
+static void pr_ops(void)
+{
+#ifdef DEBUG_PLUGIN_GEN_OPS
+    TCGOp *op;
+    int i = 0;
+
+    QTAILQ_FOREACH(op, &tcg_ctx->ops, link) {
+        const char *name = "";
+        const char *type = "";
+
+        if (op->opc == INDEX_op_plugin_cb_start) {
+            switch (op->args[0]) {
+            case PLUGIN_GEN_FROM_TB:
+                name = "tb";
+                break;
+            case PLUGIN_GEN_FROM_INSN:
+                name = "insn";
+                break;
+            case PLUGIN_GEN_FROM_MEM:
+                name = "mem";
+                break;
+            case PLUGIN_GEN_AFTER_INSN:
+                name = "after insn";
+                break;
+            default:
+                break;
+            }
+            switch (op->args[1]) {
+            case PLUGIN_GEN_CB_UDATA:
+                type = "udata";
+                break;
+            case PLUGIN_GEN_CB_INLINE:
+                type = "inline";
+                break;
+            case PLUGIN_GEN_CB_MEM:
+                type = "mem";
+                break;
+            case PLUGIN_GEN_ENABLE_MEM_HELPER:
+                type = "enable mem helper";
+                break;
+            case PLUGIN_GEN_DISABLE_MEM_HELPER:
+                type = "disable mem helper";
+                break;
+            default:
+                break;
+            }
+        }
+        printf("op[%2i]: %s %s %s\n", i, tcg_op_defs[op->opc].name, name, type);
+        i++;
+    }
+#endif
+}
+
+static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb)
+{
+    TCGOp *op;
+    int insn_idx;
+
+    pr_ops();
+    insn_idx = -1;
+    QSIMPLEQ_FOREACH(op, &tcg_ctx->plugin_ops, plugin_link) {
+        enum plugin_gen_from from = op->args[0];
+        enum plugin_gen_cb type = op->args[1];
+
+        tcg_debug_assert(op->opc == INDEX_op_plugin_cb_start);
+        /* ENABLE_MEM_HELPER is the first callback of an instruction */
+        if (from == PLUGIN_GEN_FROM_INSN &&
+            type == PLUGIN_GEN_ENABLE_MEM_HELPER) {
+            insn_idx++;
+        }
+        plugin_inject_cb(plugin_tb, op, insn_idx);
+    }
+    pr_ops();
+}
+
+bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
+{
+    struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
+    bool ret = false;
+
+    if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_mask)) {
+        ret = true;
+
+        QSIMPLEQ_INIT(&tcg_ctx->plugin_ops);
+        ptb->vaddr = tb->pc;
+        ptb->vaddr2 = -1;
+        get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
+        ptb->haddr2 = NULL;
+
+        plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
+    }
+    return ret;
+}
+
+void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db)
+{
+    struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
+    struct qemu_plugin_insn *pinsn;
+
+    pinsn = qemu_plugin_tb_insn_get(ptb);
+    tcg_ctx->plugin_insn = pinsn;
+    pinsn->vaddr = db->pc_next;
+    plugin_gen_empty_callback(PLUGIN_GEN_FROM_INSN);
+
+    /*
+     * Detect page crossing to get the new host address.
+     * Note that we skip this when haddr1 == NULL, e.g. when we're
+     * fetching instructions from a region not backed by RAM.
+     */
+    if (likely(ptb->haddr1 != NULL && ptb->vaddr2 == -1) &&
+        unlikely((db->pc_next & TARGET_PAGE_MASK) !=
+                 (db->pc_first & TARGET_PAGE_MASK))) {
+        get_page_addr_code_hostp(cpu->env_ptr, db->pc_next,
+                                 &ptb->haddr2);
+        ptb->vaddr2 = db->pc_next;
+    }
+    if (likely(ptb->vaddr2 == -1)) {
+        pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr;
+    } else {
+        pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2;
+    }
+}
+
+void plugin_gen_insn_end(void)
+{
+    plugin_gen_empty_callback(PLUGIN_GEN_AFTER_INSN);
+}
+
+void plugin_gen_tb_end(CPUState *cpu)
+{
+    struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
+    int i;
+
+    /* collect instrumentation requests */
+    qemu_plugin_tb_trans_cb(cpu, ptb);
+
+    /* inject the instrumentation at the appropriate places */
+    plugin_gen_inject(ptb);
+
+    /* clean up */
+    for (i = 0; i < PLUGIN_N_CB_SUBTYPES; i++) {
+        if (ptb->cbs[i]) {
+            g_array_set_size(ptb->cbs[i], 0);
+        }
+    }
+    ptb->n = 0;
+    tcg_ctx->plugin_insn = NULL;
+}
diff --git a/accel/tcg/plugin-helpers.h b/accel/tcg/plugin-helpers.h
new file mode 100644
index 0000000000..1916ee7920
--- /dev/null
+++ b/accel/tcg/plugin-helpers.h
@@ -0,0 +1,5 @@
+#ifdef CONFIG_PLUGIN
+/* Note: no TCG flags because those are overwritten later */
+DEF_HELPER_2(plugin_vcpu_udata_cb, void, i32, ptr)
+DEF_HELPER_4(plugin_vcpu_mem_cb, void, i32, i32, i64, ptr)
+#endif
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index ae063b53f9..9f48da9472 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1214,6 +1214,8 @@ static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
 /* flush all the translation blocks */
 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
 {
+    bool did_flush = false;
+
     mmap_lock();
     /* If it is already been done on request of another CPU,
      * just retry.
@@ -1221,6 +1223,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
         goto done;
     }
+    did_flush = true;
 
     if (DEBUG_TB_FLUSH_GATE) {
         size_t nb_tbs = tcg_nb_tbs();
@@ -1245,14 +1248,22 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
 
 done:
     mmap_unlock();
+    if (did_flush) {
+        qemu_plugin_flush_cb();
+    }
 }
 
 void tb_flush(CPUState *cpu)
 {
     if (tcg_enabled()) {
         unsigned tb_flush_count = atomic_mb_read(&tb_ctx.tb_flush_count);
-        async_safe_run_on_cpu(cpu, do_tb_flush,
-                              RUN_ON_CPU_HOST_INT(tb_flush_count));
+
+        if (cpu_in_exclusive_context(cpu)) {
+            do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
+        } else {
+            async_safe_run_on_cpu(cpu, do_tb_flush,
+                                  RUN_ON_CPU_HOST_INT(tb_flush_count));
+        }
     }
 }
 
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 70c66c538c..f977682be7 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -16,6 +16,7 @@
 #include "exec/gen-icount.h"
 #include "exec/log.h"
 #include "exec/translator.h"
+#include "exec/plugin-gen.h"
 
 /* Pairs with tcg_clear_temp_count.
    To be called by #TranslatorOps.{translate_insn,tb_stop} if
@@ -34,6 +35,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
                      CPUState *cpu, TranslationBlock *tb, int max_insns)
 {
     int bp_insn = 0;
+    bool plugin_enabled;
 
     /* Initialize DisasContext */
     db->tb = tb;
@@ -55,11 +57,17 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
     ops->tb_start(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 
+    plugin_enabled = plugin_gen_tb_start(cpu, tb);
+
     while (true) {
         db->num_insns++;
         ops->insn_start(db, cpu);
         tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 
+        if (plugin_enabled) {
+            plugin_gen_insn_start(cpu, db);
+        }
+
         /* Pass breakpoint hits to target for further processing */
         if (!db->singlestep_enabled
             && unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) {
@@ -99,6 +107,14 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
             break;
         }
 
+        /*
+         * We can't instrument after instructions that change control
+         * flow although this only really affects post-load operations.
+         */
+        if (plugin_enabled) {
+            plugin_gen_insn_end();
+        }
+
         /* Stop translation if the output buffer is full,
            or we have executed all of the allowed instructions.  */
         if (tcg_op_buf_full() || db->num_insns >= db->max_insns) {
@@ -111,6 +127,10 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
     ops->tb_stop(db, cpu);
     gen_tb_end(db->tb, db->num_insns - bp_insn);
 
+    if (plugin_enabled) {
+        plugin_gen_tb_end(cpu);
+    }
+
     /* The disas_log hook may use these values rather than recompute.  */
     db->tb->size = db->pc_next - db->pc_first;
     db->tb->icount = db->num_insns;
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 71c4bf6477..b09f7a1577 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -751,10 +751,13 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 #define ATOMIC_MMU_DECLS do {} while (0)
 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
+#define ATOMIC_MMU_IDX MMU_USER_IDX
 
 #define ATOMIC_NAME(X)   HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
 #define EXTRA_ARGS
 
+#include "atomic_common.inc.c"
+
 #define DATA_SIZE 1
 #include "atomic_template.h"
 
diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c
index 1ee6195d9f..0d45b654bb 100644
--- a/bsd-user/syscall.c
+++ b/bsd-user/syscall.c
@@ -26,6 +26,7 @@
 
 #include "qemu.h"
 #include "qemu-common.h"
+#include "user/syscall-trace.h"
 
 //#define DEBUG
 
@@ -322,7 +323,8 @@ abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1,
 #ifdef DEBUG
     gemu_log("freebsd syscall %d\n", num);
 #endif
-    trace_guest_user_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8);
+    record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0);
+
     if(do_strace)
         print_freebsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
 
@@ -332,6 +334,7 @@ abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        qemu_plugin_atexit_cb();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
@@ -402,7 +405,8 @@ abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
     if (do_strace)
         print_freebsd_syscall_ret(num, ret);
-    trace_guest_user_syscall_ret(cpu, num, ret);
+
+    record_syscall_return(cpu, num, ret);
     return ret;
  efault:
     ret = -TARGET_EFAULT;
@@ -420,7 +424,9 @@ abi_long do_netbsd_syscall(void *cpu_env, int num, abi_long arg1,
 #ifdef DEBUG
     gemu_log("netbsd syscall %d\n", num);
 #endif
-    trace_guest_user_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0);
+
+    record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0);
+
     if(do_strace)
         print_netbsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
 
@@ -430,6 +436,7 @@ abi_long do_netbsd_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        qemu_plugin_atexit_cb();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
@@ -477,7 +484,8 @@ abi_long do_netbsd_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
     if (do_strace)
         print_netbsd_syscall_ret(num, ret);
-    trace_guest_user_syscall_ret(cpu, num, ret);
+
+    record_syscall_return(cpu, num, ret);
     return ret;
  efault:
     ret = -TARGET_EFAULT;
@@ -495,7 +503,9 @@ abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1,
 #ifdef DEBUG
     gemu_log("openbsd syscall %d\n", num);
 #endif
-    trace_guest_user_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0);
+
+    record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0);
+
     if(do_strace)
         print_openbsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
 
@@ -505,6 +515,7 @@ abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        qemu_plugin_atexit_cb();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
@@ -552,7 +563,8 @@ abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
     if (do_strace)
         print_openbsd_syscall_ret(num, ret);
-    trace_guest_user_syscall_ret(cpu, num, ret);
+
+    record_syscall_return(cpu, num, ret);
     return ret;
  efault:
     ret = -TARGET_EFAULT;
diff --git a/configure b/configure
index 3be9e92a24..72553f98ea 100755
--- a/configure
+++ b/configure
@@ -30,6 +30,7 @@ TMPO="${TMPDIR1}/${TMPB}.o"
 TMPCXX="${TMPDIR1}/${TMPB}.cxx"
 TMPE="${TMPDIR1}/${TMPB}.exe"
 TMPMO="${TMPDIR1}/${TMPB}.mo"
+TMPTXT="${TMPDIR1}/${TMPB}.txt"
 
 rm -f config.log
 
@@ -501,6 +502,7 @@ libxml2=""
 debug_mutex="no"
 libpmem=""
 default_devices="yes"
+plugins="no"
 
 supported_cpu="no"
 supported_os="no"
@@ -1532,6 +1534,10 @@ for opt do
   ;;
   --disable-xkbcommon) xkbcommon=no
   ;;
+  --enable-plugins) plugins="yes"
+  ;;
+  --disable-plugins) plugins="no"
+  ;;
   *)
       echo "ERROR: unknown option $opt"
       echo "Try '$0 --help' for more information"
@@ -1713,6 +1719,8 @@ Advanced options (experts only):
   --enable-profiler        profiler support
   --enable-debug-stack-usage
                            track the maximum stack usage of stacks created by qemu_alloc_stack
+  --enable-plugins
+                           enable plugins via shared library loading
 
 Optional features, enabled with --enable-FEATURE and
 disabled with --disable-FEATURE, default is enabled if available:
@@ -3644,6 +3652,9 @@ glib_modules=gthread-2.0
 if test "$modules" = yes; then
     glib_modules="$glib_modules gmodule-export-2.0"
 fi
+if test "$plugins" = yes; then
+    glib_modules="$glib_modules gmodule-2.0"
+fi
 
 # This workaround is required due to a bug in pkg-config file for glib as it
 # doesn't define GLIB_STATIC_COMPILATION for pkg-config --static
@@ -5494,6 +5505,61 @@ if compile_prog "" "" ; then
   atomic64=yes
 fi
 
+#########################################
+# See if --dynamic-list is supported by the linker
+ld_dynamic_list="no"
+if test "$static" = "no" ; then
+    cat > $TMPTXT <<EOF
+{
+  foo;
+};
+EOF
+
+    cat > $TMPC <<EOF
+#include <stdio.h>
+void foo(void);
+
+void foo(void)
+{
+  printf("foo\n");
+}
+
+int main(void)
+{
+  foo();
+  return 0;
+}
+EOF
+
+    if compile_prog "" "-Wl,--dynamic-list=$TMPTXT" ; then
+        ld_dynamic_list="yes"
+    fi
+fi
+
+#########################################
+# See if -exported_symbols_list is supported by the linker
+
+ld_exported_symbols_list="no"
+if test "$static" = "no" ; then
+    cat > $TMPTXT <<EOF
+  _foo
+EOF
+
+    if compile_prog "" "-Wl,-exported_symbols_list,$TMPTXT" ; then
+        ld_exported_symbols_list="yes"
+    fi
+fi
+
+if  test "$plugins" = "yes" &&
+    test "$ld_dynamic_list" = "no" &&
+    test "$ld_exported_symbols_list" = "no" ; then
+  error_exit \
+      "Plugin support requires dynamic linking and specifying a set of symbols " \
+      "that are exported to plugins. Unfortunately your linker doesn't " \
+      "support the flag (--dynamic-list or -exported_symbols_list) used " \
+      "for this purpose. You can't build with --static."
+fi
+
 ########################################
 # See if 16-byte vector operations are supported.
 # Even without a vector unit the compiler may expand these.
@@ -6491,6 +6557,7 @@ echo "capstone          $capstone"
 echo "libpmem support   $libpmem"
 echo "libudev           $libudev"
 echo "default devices   $default_devices"
+echo "plugin support    $plugins"
 
 if test "$supported_cpu" = "no"; then
     echo
@@ -7328,6 +7395,27 @@ if test "$sheepdog" = "yes" ; then
   echo "CONFIG_SHEEPDOG=y" >> $config_host_mak
 fi
 
+if test "$plugins" = "yes" ; then
+    echo "CONFIG_PLUGIN=y" >> $config_host_mak
+    LIBS="-ldl $LIBS"
+    # Copy the export object list to the build dir
+    if test "$ld_dynamic_list" = "yes" ; then
+	echo "CONFIG_HAS_LD_DYNAMIC_LIST=yes" >> $config_host_mak
+	ld_symbols=qemu-plugins-ld.symbols
+	cp "$source_path/plugins/qemu-plugins.symbols" $ld_symbols
+    elif test "$ld_exported_symbols_list" = "yes" ; then
+	echo "CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST=yes" >> $config_host_mak
+	ld64_symbols=qemu-plugins-ld64.symbols
+	echo "# Automatically generated by configure - do not modify" > $ld64_symbols
+	grep 'qemu_' "$source_path/plugins/qemu-plugins.symbols" | sed 's/;//g' | \
+	    sed -E 's/^[[:space:]]*(.*)/_\1/' >> $ld64_symbols
+    else
+	error_exit \
+	    "If \$plugins=yes, either \$ld_dynamic_list or " \
+	    "\$ld_exported_symbols_list should have been set to 'yes'."
+    fi
+fi
+
 if test "$tcg_interpreter" = "yes"; then
   QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 elif test "$ARCH" = "sparc64" ; then
@@ -7874,6 +7962,7 @@ DIRS="$DIRS roms/seabios roms/vgabios"
 LINKS="Makefile"
 LINKS="$LINKS tests/tcg/lm32/Makefile po/Makefile"
 LINKS="$LINKS tests/tcg/Makefile.target tests/fp/Makefile"
+LINKS="$LINKS tests/plugin/Makefile"
 LINKS="$LINKS pc-bios/optionrom/Makefile pc-bios/keymaps"
 LINKS="$LINKS pc-bios/s390-ccw/Makefile"
 LINKS="$LINKS roms/seabios/Makefile roms/vgabios/Makefile"
diff --git a/cpus-common.c b/cpus-common.c
index af3385a296..eaf590cb38 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -200,11 +200,15 @@ void start_exclusive(void)
      * section until end_exclusive resets pending_cpus to 0.
      */
     qemu_mutex_unlock(&qemu_cpu_list_lock);
+
+    current_cpu->in_exclusive_context = true;
 }
 
 /* Finish an exclusive operation.  */
 void end_exclusive(void)
 {
+    current_cpu->in_exclusive_context = false;
+
     qemu_mutex_lock(&qemu_cpu_list_lock);
     atomic_set(&pending_cpus, 0);
     qemu_cond_broadcast(&exclusive_resume);
diff --git a/cpus.c b/cpus.c
index fabbeca6f4..63bda152f5 100644
--- a/cpus.c
+++ b/cpus.c
@@ -45,6 +45,7 @@
 #include "exec/exec-all.h"
 
 #include "qemu/thread.h"
+#include "qemu/plugin.h"
 #include "sysemu/cpus.h"
 #include "sysemu/qtest.h"
 #include "qemu/main-loop.h"
@@ -1264,9 +1265,18 @@ static void qemu_tcg_rr_wait_io_event(void)
 
 static void qemu_wait_io_event(CPUState *cpu)
 {
+    bool slept = false;
+
     while (cpu_thread_is_idle(cpu)) {
+        if (!slept) {
+            slept = true;
+            qemu_plugin_vcpu_idle_cb(cpu);
+        }
         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
     }
+    if (slept) {
+        qemu_plugin_vcpu_resume_cb(cpu);
+    }
 
 #ifdef _WIN32
     /* Eat dummy APC queued by qemu_cpu_kick_thread.  */
diff --git a/disas.c b/disas.c
index 3e2bfa572b..3937da6157 100644
--- a/disas.c
+++ b/disas.c
@@ -418,6 +418,7 @@ static bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
 # define cap_disas_target(i, p, s)  false
 # define cap_disas_host(i, p, s)  false
 # define cap_disas_monitor(i, p, c)  false
+# define cap_disas_plugin(i, p, c) false
 #endif /* CONFIG_CAPSTONE */
 
 /* Disassemble this for me please... (debugging).  */
@@ -475,6 +476,115 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
     }
 }
 
+static __thread GString plugin_disas_output;
+
+static int plugin_printf(FILE *stream, const char *fmt, ...)
+{
+    va_list va;
+    GString *s = &plugin_disas_output;
+    int initial_len = s->len;
+
+    va_start(va, fmt);
+    g_string_append_vprintf(s, fmt, va);
+    va_end(va);
+
+    return s->len - initial_len;
+}
+
+static void plugin_print_address(bfd_vma addr, struct disassemble_info *info)
+{
+    /* does nothing */
+}
+
+
+#ifdef CONFIG_CAPSTONE
+/* Disassemble a single instruction directly into plugin output */
+static
+bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size)
+{
+    uint8_t cap_buf[1024];
+    csh handle;
+    cs_insn *insn;
+    size_t csize = 0;
+    int count;
+    GString *s = &plugin_disas_output;
+
+    if (cap_disas_start(info, &handle) != CS_ERR_OK) {
+        return false;
+    }
+    insn = cap_insn;
+
+    size_t tsize = MIN(sizeof(cap_buf) - csize, size);
+    const uint8_t *cbuf = cap_buf;
+    target_read_memory(pc, cap_buf, tsize, info);
+
+    count = cs_disasm(handle, cbuf, size, 0, 1, &insn);
+
+    if (count) {
+        g_string_printf(s, "%s %s", insn->mnemonic, insn->op_str);
+    } else {
+        g_string_printf(s, "cs_disasm failed");
+    }
+
+    cs_close(&handle);
+    return true;
+}
+#endif
+
+/*
+ * We should only be dissembling one instruction at a time here. If
+ * there is left over it usually indicates the front end has read more
+ * bytes than it needed.
+ */
+char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+    int count;
+    CPUDebug s;
+    GString *ds = g_string_set_size(&plugin_disas_output, 0);
+
+    g_assert(ds == &plugin_disas_output);
+
+    INIT_DISASSEMBLE_INFO(s.info, NULL, plugin_printf);
+
+    s.cpu = cpu;
+    s.info.read_memory_func = target_read_memory;
+    s.info.buffer_vma = addr;
+    s.info.buffer_length = size;
+    s.info.print_address_func = plugin_print_address;
+    s.info.cap_arch = -1;
+    s.info.cap_mode = 0;
+    s.info.cap_insn_unit = 4;
+    s.info.cap_insn_split = 4;
+
+#ifdef TARGET_WORDS_BIGENDIAN
+    s.info.endian = BFD_ENDIAN_BIG;
+#else
+    s.info.endian = BFD_ENDIAN_LITTLE;
+#endif
+
+    if (cc->disas_set_info) {
+        cc->disas_set_info(cpu, &s.info);
+    }
+
+    if (s.info.cap_arch >= 0 && cap_disas_plugin(&s.info, addr, size)) {
+        return g_strdup(ds->str);
+    }
+
+    if (s.info.print_insn == NULL) {
+        s.info.print_insn = print_insn_od_target;
+    }
+
+    count = s.info.print_insn(addr, &s.info);
+
+    /* The decoder probably read more than it needed it's not critical */
+    if (count < size) {
+        warn_report("%s: %zu bytes left over", __func__, size - count);
+    }
+
+    return g_strdup(ds->str);
+}
+
 /* Disassemble this for me please... (debugging). */
 void disas(FILE *out, void *code, unsigned long size)
 {
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index 1ec61fcfed..2ff058bae3 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -22,3 +22,4 @@ Contents:
    decodetree
    secure-coding-practices
    tcg
+   plugins
diff --git a/docs/devel/plugins.rst b/docs/devel/plugins.rst
new file mode 100644
index 0000000000..b18fb6729e
--- /dev/null
+++ b/docs/devel/plugins.rst
@@ -0,0 +1,112 @@
+..
+   Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+   Copyright (c) 2019, Linaro Limited
+   Written by Emilio Cota and Alex Bennée
+
+================
+QEMU TCG Plugins
+================
+
+QEMU TCG plugins provide a way for users to run experiments taking
+advantage of the total system control emulation can have over a guest.
+It provides a mechanism for plugins to subscribe to events during
+translation and execution and optionally callback into the plugin
+during these events. TCG plugins are unable to change the system state
+only monitor it passively. However they can do this down to an
+individual instruction granularity including potentially subscribing
+to all load and store operations.
+
+API Stability
+=============
+
+This is a new feature for QEMU and it does allow people to develop
+out-of-tree plugins that can be dynamically linked into a running QEMU
+process. However the project reserves the right to change or break the
+API should it need to do so. The best way to avoid this is to submit
+your plugin upstream so they can be updated if/when the API changes.
+
+
+Exposure of QEMU internals
+--------------------------
+
+The plugin architecture actively avoids leaking implementation details
+about how QEMU's translation works to the plugins. While there are
+conceptions such as translation time and translation blocks the
+details are opaque to plugins. The plugin is able to query select
+details of instructions and system configuration only through the
+exported *qemu_plugin* functions. The types used to describe
+instructions and events are opaque to the plugins themselves.
+
+Usage
+=====
+
+The QEMU binary needs to be compiled for plugin support:
+
+::
+    configure --enable-plugins
+
+Once built a program can be run with multiple plugins loaded each with
+their own arguments:
+
+::
+    $QEMU $OTHER_QEMU_ARGS \
+      -plugin tests/plugin/libhowvec.so,arg=inline,arg=hint \
+      -plugin tests/plugin/libhotblocks.so
+
+Arguments are plugin specific and can be used to modify their
+behaviour. In this case the howvec plugin is being asked to use inline
+ops to count and break down the hint instructions by type.
+
+Plugin Life cycle
+=================
+
+First the plugin is loaded and the public qemu_plugin_install function
+is called. The plugin will then register callbacks for various plugin
+events. Generally plugins will register a handler for the *atexit*
+if they want to dump a summary of collected information once the
+program/system has finished running.
+
+When a registered event occurs the plugin callback is invoked. The
+callbacks may provide additional information. In the case of a
+translation event the plugin has an option to enumerate the
+instructions in a block of instructions and optionally register
+callbacks to some or all instructions when they are executed.
+
+There is also a facility to add an inline event where code to
+increment a counter can be directly inlined with the translation.
+Currently only a simple increment is supported. This is not atomic so
+can miss counts. If you want absolute precision you should use a
+callback which can then ensure atomicity itself.
+
+Finally when QEMU exits all the registered *atexit* callbacks are
+invoked.
+
+Internals
+=========
+
+Locking
+-------
+
+We have to ensure we cannot deadlock, particularly under MTTCG. For
+this we acquire a lock when called from plugin code. We also keep the
+list of callbacks under RCU so that we do not have to hold the lock
+when calling the callbacks. This is also for performance, since some
+callbacks (e.g. memory access callbacks) might be called very
+frequently.
+
+  * A consequence of this is that we keep our own list of CPUs, so that
+    we do not have to worry about locking order wrt cpu_list_lock.
+  * Use a recursive lock, since we can get registration calls from
+    callbacks.
+
+As a result registering/unregistering callbacks is "slow", since it
+takes a lock. But this is very infrequent; we want performance when
+calling (or not calling) callbacks, not when registering them. Using
+RCU is great for this.
+
+We support the uninstallation of a plugin at any time (e.g. from
+plugin callbacks). This allows plugins to remove themselves if they no
+longer want to instrument the code. This operation is asynchronous
+which means callbacks may still occur after the uninstall operation is
+requested. The plugin isn't completely uninstalled until the safe work
+has executed while all vCPUs are quiescent.
diff --git a/exec.c b/exec.c
index 91c8b79656..ffdb518535 100644
--- a/exec.c
+++ b/exec.c
@@ -941,6 +941,8 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
     }
     tlb_init(cpu);
 
+    qemu_plugin_vcpu_init_hook(cpu);
+
 #ifndef CONFIG_USER_ONLY
     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
         vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
diff --git a/hw/core/cpu.c b/hw/core/cpu.c
index 73b1ee34d0..db1a03c6bb 100644
--- a/hw/core/cpu.c
+++ b/hw/core/cpu.c
@@ -32,6 +32,7 @@
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
 #include "trace-root.h"
+#include "qemu/plugin.h"
 
 CPUInterruptHandler cpu_interrupt_handler;
 
@@ -352,6 +353,7 @@ static void cpu_common_unrealizefn(DeviceState *dev, Error **errp)
     CPUState *cpu = CPU(dev);
     /* NOTE: latest generic point before the cpu is fully unrealized */
     trace_fini_vcpu(cpu);
+    qemu_plugin_vcpu_exit_hook(cpu);
     cpu_exec_unrealizefn(cpu);
 }
 
diff --git a/include/disas/disas.h b/include/disas/disas.h
index ba47e9197c..36c33f6f19 100644
--- a/include/disas/disas.h
+++ b/include/disas/disas.h
@@ -14,6 +14,8 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
 void monitor_disas(Monitor *mon, CPUState *cpu,
                    target_ulong pc, int nb_insn, int is_physical);
 
+char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
+
 /* Look up symbol for debugging purpose.  Returns "" if unknown. */
 const char *lookup_symbol(target_ulong orig_addr);
 #endif
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index be946ba1ce..8c44abefa2 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -214,6 +214,7 @@ typedef struct CPUTLBCommon {
  * Since this is placed within CPUNegativeOffsetState, the smallest
  * negative offsets are at the end of the struct.
  */
+
 typedef struct CPUTLB {
     CPUTLBCommon c;
     CPUTLBDesc d[NB_MMU_MODES];
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index 9151fdb042..fd499f7e2f 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -129,6 +129,11 @@ static inline void clear_helper_retaddr(void)
 #include "exec/cpu_ldst_useronly_template.h"
 #undef MEMSUFFIX
 
+/*
+ * Code access is deprecated in favour of translator_ld* functions
+ * (see translator.h). However there are still users that need to
+ * converted so for now these stay.
+ */
 #define MEMSUFFIX _code
 #define CODE_ACCESS
 #define DATA_SIZE 1
@@ -427,6 +432,12 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
 #undef CPU_MMU_INDEX
 #undef MEMSUFFIX
 
+/*
+ * Code access is deprecated in favour of translator_ld* functions
+ * (see translator.h). However there are still users that need to
+ * converted so for now these stay.
+ */
+
 #define CPU_MMU_INDEX (cpu_mmu_index(env, true))
 #define MEMSUFFIX _code
 #define SOFTMMU_CODE_ACCESS
diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h
index 3d24ed9bd0..54b5e858ce 100644
--- a/include/exec/cpu_ldst_template.h
+++ b/include/exec/cpu_ldst_template.h
@@ -28,6 +28,7 @@
 #include "trace-root.h"
 #endif
 
+#include "qemu/plugin.h"
 #include "trace/mem.h"
 
 #if DATA_SIZE == 8
@@ -84,17 +85,14 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
     CPUTLBEntry *entry;
     RES_TYPE res;
     target_ulong addr;
-    int mmu_idx;
+    int mmu_idx = CPU_MMU_INDEX;
     TCGMemOpIdx oi;
-
 #if !defined(SOFTMMU_CODE_ACCESS)
-    trace_guest_mem_before_exec(
-        env_cpu(env), ptr,
-        trace_mem_build_info(SHIFT, false, MO_TE, false));
+    uint16_t meminfo = trace_mem_build_info(SHIFT, false, MO_TE, false, mmu_idx);
+    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
 #endif
 
     addr = ptr;
-    mmu_idx = CPU_MMU_INDEX;
     entry = tlb_entry(env, mmu_idx, addr);
     if (unlikely(entry->ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
@@ -105,6 +103,9 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
         uintptr_t hostaddr = addr + entry->addend;
         res = glue(glue(ld, USUFFIX), _p)((uint8_t *)hostaddr);
     }
+#ifndef SOFTMMU_CODE_ACCESS
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
+#endif
     return res;
 }
 
@@ -123,17 +124,14 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
     CPUTLBEntry *entry;
     int res;
     target_ulong addr;
-    int mmu_idx;
+    int mmu_idx = CPU_MMU_INDEX;
     TCGMemOpIdx oi;
-
 #if !defined(SOFTMMU_CODE_ACCESS)
-    trace_guest_mem_before_exec(
-        env_cpu(env), ptr,
-        trace_mem_build_info(SHIFT, true, MO_TE, false));
+    uint16_t meminfo = trace_mem_build_info(SHIFT, true, MO_TE, false, mmu_idx);
+    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
 #endif
 
     addr = ptr;
-    mmu_idx = CPU_MMU_INDEX;
     entry = tlb_entry(env, mmu_idx, addr);
     if (unlikely(entry->ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
@@ -144,6 +142,9 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
         uintptr_t hostaddr = addr + entry->addend;
         res = glue(glue(lds, SUFFIX), _p)((uint8_t *)hostaddr);
     }
+#ifndef SOFTMMU_CODE_ACCESS
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
+#endif
     return res;
 }
 
@@ -165,17 +166,14 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
 {
     CPUTLBEntry *entry;
     target_ulong addr;
-    int mmu_idx;
+    int mmu_idx = CPU_MMU_INDEX;
     TCGMemOpIdx oi;
-
 #if !defined(SOFTMMU_CODE_ACCESS)
-    trace_guest_mem_before_exec(
-        env_cpu(env), ptr,
-        trace_mem_build_info(SHIFT, false, MO_TE, true));
+    uint16_t meminfo = trace_mem_build_info(SHIFT, false, MO_TE, true, mmu_idx);
+    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
 #endif
 
     addr = ptr;
-    mmu_idx = CPU_MMU_INDEX;
     entry = tlb_entry(env, mmu_idx, addr);
     if (unlikely(tlb_addr_write(entry) !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
@@ -186,6 +184,9 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
         uintptr_t hostaddr = addr + entry->addend;
         glue(glue(st, SUFFIX), _p)((uint8_t *)hostaddr, v);
     }
+#ifndef SOFTMMU_CODE_ACCESS
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
+#endif
 }
 
 static inline void
diff --git a/include/exec/cpu_ldst_useronly_template.h b/include/exec/cpu_ldst_useronly_template.h
index 2378f2958c..dbdc7a845d 100644
--- a/include/exec/cpu_ldst_useronly_template.h
+++ b/include/exec/cpu_ldst_useronly_template.h
@@ -64,18 +64,18 @@
 static inline RES_TYPE
 glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr)
 {
-#ifdef CODE_ACCESS
     RES_TYPE ret;
+#ifdef CODE_ACCESS
     set_helper_retaddr(1);
     ret = glue(glue(ld, USUFFIX), _p)(g2h(ptr));
     clear_helper_retaddr();
-    return ret;
 #else
-    trace_guest_mem_before_exec(
-        env_cpu(env), ptr,
-        trace_mem_build_info(SHIFT, false, MO_TE, false));
-    return glue(glue(ld, USUFFIX), _p)(g2h(ptr));
+    uint16_t meminfo = trace_mem_build_info(SHIFT, false, MO_TE, false,
+                                            MMU_USER_IDX);
+    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
+    ret = glue(glue(ld, USUFFIX), _p)(g2h(ptr));
 #endif
+    return ret;
 }
 
 #ifndef CODE_ACCESS
@@ -96,18 +96,19 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
 static inline int
 glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr)
 {
-#ifdef CODE_ACCESS
     int ret;
+#ifdef CODE_ACCESS
     set_helper_retaddr(1);
     ret = glue(glue(lds, SUFFIX), _p)(g2h(ptr));
     clear_helper_retaddr();
-    return ret;
 #else
-    trace_guest_mem_before_exec(
-        env_cpu(env), ptr,
-        trace_mem_build_info(SHIFT, true, MO_TE, false));
-    return glue(glue(lds, SUFFIX), _p)(g2h(ptr));
+    uint16_t meminfo = trace_mem_build_info(SHIFT, true, MO_TE, false,
+                                            MMU_USER_IDX);
+    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
+    ret = glue(glue(lds, SUFFIX), _p)(g2h(ptr));
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
 #endif
+    return ret;
 }
 
 #ifndef CODE_ACCESS
@@ -130,10 +131,11 @@ static inline void
 glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr,
                                       RES_TYPE v)
 {
-    trace_guest_mem_before_exec(
-        env_cpu(env), ptr,
-        trace_mem_build_info(SHIFT, false, MO_TE, true));
+    uint16_t meminfo = trace_mem_build_info(SHIFT, false, MO_TE, true,
+                                            MMU_USER_IDX);
+    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
     glue(glue(st, SUFFIX), _p)(g2h(ptr), v);
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
 }
 
 static inline void
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 04795c49bf..d85e610e85 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -22,6 +22,9 @@
 
 #include "cpu.h"
 #include "exec/tb-context.h"
+#ifdef CONFIG_TCG
+#include "exec/cpu_ldst.h"
+#endif
 #include "sysemu/cpus.h"
 
 /* allow to see translation results - the slowdown should be negligible, so we leave it */
@@ -504,16 +507,71 @@ void mmap_lock(void);
 void mmap_unlock(void);
 bool have_mmap_lock(void);
 
-static inline tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
+/**
+ * get_page_addr_code() - user-mode version
+ * @env: CPUArchState
+ * @addr: guest virtual address of guest code
+ *
+ * Returns @addr.
+ */
+static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
+                                                target_ulong addr)
 {
     return addr;
 }
+
+/**
+ * get_page_addr_code_hostp() - user-mode version
+ * @env: CPUArchState
+ * @addr: guest virtual address of guest code
+ *
+ * Returns @addr.
+ *
+ * If @hostp is non-NULL, sets *@hostp to the host address where @addr's content
+ * is kept.
+ */
+static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env,
+                                                      target_ulong addr,
+                                                      void **hostp)
+{
+    if (hostp) {
+        *hostp = g2h(addr);
+    }
+    return addr;
+}
 #else
 static inline void mmap_lock(void) {}
 static inline void mmap_unlock(void) {}
 
-/* cputlb.c */
-tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr);
+/**
+ * get_page_addr_code() - full-system version
+ * @env: CPUArchState
+ * @addr: guest virtual address of guest code
+ *
+ * If we cannot translate and execute from the entire RAM page, or if
+ * the region is not backed by RAM, returns -1. Otherwise, returns the
+ * ram_addr_t corresponding to the guest code at @addr.
+ *
+ * Note: this function can trigger an exception.
+ */
+tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr);
+
+/**
+ * get_page_addr_code_hostp() - full-system version
+ * @env: CPUArchState
+ * @addr: guest virtual address of guest code
+ *
+ * See get_page_addr_code() (full-system version) for documentation on the
+ * return value.
+ *
+ * Sets *@hostp (when @hostp is non-NULL) as follows.
+ * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
+ * to the host address where @addr's content is kept.
+ *
+ * Note: this function can trigger an exception.
+ */
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
+                                        void **hostp);
 
 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr);
diff --git a/include/exec/helper-gen.h b/include/exec/helper-gen.h
index 22381a1708..236ff40524 100644
--- a/include/exec/helper-gen.h
+++ b/include/exec/helper-gen.h
@@ -70,6 +70,7 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
 #include "trace/generated-helpers.h"
 #include "trace/generated-helpers-wrappers.h"
 #include "tcg-runtime.h"
+#include "plugin-helpers.h"
 
 #undef DEF_HELPER_FLAGS_0
 #undef DEF_HELPER_FLAGS_1
diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
index 74943edb13..1c4ba9bc78 100644
--- a/include/exec/helper-proto.h
+++ b/include/exec/helper-proto.h
@@ -33,6 +33,7 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
 #include "helper.h"
 #include "trace/generated-helpers.h"
 #include "tcg-runtime.h"
+#include "plugin-helpers.h"
 
 #undef DEF_HELPER_FLAGS_0
 #undef DEF_HELPER_FLAGS_1
diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h
index 268e0f804b..573c2ce2e9 100644
--- a/include/exec/helper-tcg.h
+++ b/include/exec/helper-tcg.h
@@ -55,6 +55,7 @@
 #include "helper.h"
 #include "trace/generated-helpers.h"
 #include "tcg-runtime.h"
+#include "plugin-helpers.h"
 
 #undef str
 #undef DEF_HELPER_FLAGS_0
diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
new file mode 100644
index 0000000000..4834a9e2f4
--- /dev/null
+++ b/include/exec/plugin-gen.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * plugin-gen.h - TCG-dependent definitions for generating plugin code
+ *
+ * This header should be included only from plugin.c and C files that emit
+ * TCG code.
+ */
+#ifndef QEMU_PLUGIN_GEN_H
+#define QEMU_PLUGIN_GEN_H
+
+#include "qemu/plugin.h"
+#include "tcg/tcg.h"
+
+struct DisasContextBase;
+
+#ifdef CONFIG_PLUGIN
+
+bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb);
+void plugin_gen_tb_end(CPUState *cpu);
+void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
+void plugin_gen_insn_end(void);
+
+void plugin_gen_disable_mem_helpers(void);
+void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info);
+
+static inline void plugin_insn_append(const void *from, size_t size)
+{
+    struct qemu_plugin_insn *insn = tcg_ctx->plugin_insn;
+
+    if (insn == NULL) {
+        return;
+    }
+
+    insn->data = g_byte_array_append(insn->data, from, size);
+}
+
+#else /* !CONFIG_PLUGIN */
+
+static inline
+bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
+{
+    return false;
+}
+
+static inline
+void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db)
+{ }
+
+static inline void plugin_gen_insn_end(void)
+{ }
+
+static inline void plugin_gen_tb_end(CPUState *cpu)
+{ }
+
+static inline void plugin_gen_disable_mem_helpers(void)
+{ }
+
+static inline void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
+{ }
+
+static inline void plugin_insn_append(const void *from, size_t size)
+{ }
+
+#endif /* CONFIG_PLUGIN */
+
+#endif /* QEMU_PLUGIN_GEN_H */
+
diff --git a/include/exec/translator.h b/include/exec/translator.h
index 180c51d509..459dd72aab 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -19,7 +19,10 @@
  */
 
 
+#include "qemu/bswap.h"
 #include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "exec/plugin-gen.h"
 #include "tcg/tcg.h"
 
 
@@ -142,4 +145,61 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
 
 void translator_loop_temp_check(DisasContextBase *db);
 
-#endif /* EXEC__TRANSLATOR_H */
+/*
+ * Translator Load Functions
+ *
+ * These are intended to replace the old cpu_ld*_code functions and
+ * are mandatory for front-ends that have been migrated to the common
+ * translator_loop. These functions are only intended to be called
+ * from the translation stage and should not be called from helper
+ * functions. Those functions should be converted to encode the
+ * relevant information at translation time.
+ */
+
+#ifdef CONFIG_USER_ONLY
+
+#define DO_LOAD(type, name, shift)               \
+    do {                                         \
+        set_helper_retaddr(1);                   \
+        ret = name ## _p(g2h(pc));               \
+        clear_helper_retaddr();                  \
+    } while (0)
+
+#else
+
+#define DO_LOAD(type, name, shift)                          \
+    do {                                                    \
+        int mmu_idx = cpu_mmu_index(env, true);             \
+        TCGMemOpIdx oi = make_memop_idx(shift, mmu_idx);    \
+        ret = helper_ret_ ## name ## _cmmu(env, pc, oi, 0); \
+    } while (0)
+
+#endif
+
+#define GEN_TRANSLATOR_LD(fullname, name, type, shift, swap_fn)         \
+    static inline type                                                  \
+    fullname ## _swap(CPUArchState *env, abi_ptr pc, bool do_swap)      \
+    {                                                                   \
+        type ret;                                                       \
+        DO_LOAD(type, name, shift);                                     \
+                                                                        \
+        if (do_swap) {                                                  \
+            ret = swap_fn(ret);                                         \
+        }                                                               \
+        plugin_insn_append(&ret, sizeof(ret));                          \
+        return ret;                                                     \
+    }                                                                   \
+                                                                        \
+    static inline type fullname(CPUArchState *env, abi_ptr pc)          \
+    {                                                                   \
+        return fullname ## _swap(env, pc, false);                       \
+    }
+
+GEN_TRANSLATOR_LD(translator_ldub, ldub, uint8_t, 0, /* no swap */ )
+GEN_TRANSLATOR_LD(translator_ldsw, ldsw, int16_t, 1, bswap16)
+GEN_TRANSLATOR_LD(translator_lduw, lduw, uint16_t, 1, bswap16)
+GEN_TRANSLATOR_LD(translator_ldl, ldl, uint32_t, 2, bswap32)
+GEN_TRANSLATOR_LD(translator_ldq, ldq, uint64_t, 3, bswap64)
+#undef GEN_TRANSLATOR_LD
+
+#endif  /* EXEC__TRANSLATOR_H */
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 031f587e51..e1c383ba84 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -29,6 +29,7 @@
 #include "qemu/rcu_queue.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
+#include "qemu/plugin.h"
 
 typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
                                      void *opaque);
@@ -344,6 +345,7 @@ struct qemu_work_item;
  * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes
  *                        to @trace_dstate).
  * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
+ * @plugin_mask: Plugin event bitmap. Modified only via async work.
  * @ignore_memory_transaction_failures: Cached copy of the MachineState
  *    flag of the same name: allows the board to suppress calling of the
  *    CPU do_transaction_failed hook function.
@@ -372,6 +374,7 @@ struct CPUState {
     bool unplug;
     bool crash_occurred;
     bool exit_request;
+    bool in_exclusive_context;
     uint32_t cflags_next_tb;
     /* updates protected by BQL */
     uint32_t interrupt_request;
@@ -427,6 +430,10 @@ struct CPUState {
     DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
     DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
 
+    DECLARE_BITMAP(plugin_mask, QEMU_PLUGIN_EV_MAX);
+
+    GArray *plugin_mem_cbs;
+
     /* TODO Move common fields from CPUArchState here. */
     int cpu_index;
     int cluster_index;
@@ -783,6 +790,18 @@ void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
  */
 void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data);
 
+/**
+ * cpu_in_exclusive_context()
+ * @cpu: The vCPU to check
+ *
+ * Returns true if @cpu is an exclusive context, for example running
+ * something which has previously been queued via async_safe_run_on_cpu().
+ */
+static inline bool cpu_in_exclusive_context(const CPUState *cpu)
+{
+    return cpu->in_exclusive_context;
+}
+
 /**
  * qemu_get_cpu:
  * @index: The CPUState@cpu_index value of the CPU to obtain.
diff --git a/include/qemu/log.h b/include/qemu/log.h
index b097a6cae1..a91105b2ad 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void)
 /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
 #define CPU_LOG_TB_OP_IND  (1 << 16)
 #define CPU_LOG_TB_FPU     (1 << 17)
+#define CPU_LOG_PLUGIN     (1 << 18)
 
 /* Lock output for a series of related logs.  Since this is not needed
  * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h
new file mode 100644
index 0000000000..fbbe99474b
--- /dev/null
+++ b/include/qemu/plugin-memory.h
@@ -0,0 +1,40 @@
+/*
+ * Plugin Memory API
+ *
+ * Copyright (c) 2019 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _PLUGIN_MEMORY_H_
+#define _PLUGIN_MEMORY_H_
+
+struct qemu_plugin_hwaddr {
+    bool is_io;
+    bool is_store;
+    union {
+        struct {
+            MemoryRegionSection *section;
+            hwaddr    offset;
+        } io;
+        struct {
+            uint64_t hostaddr;
+        } ram;
+    } v;
+};
+
+/**
+ * tlb_plugin_lookup: query last TLB lookup
+ * @cpu: cpu environment
+ *
+ * This function can be used directly after a memory operation to
+ * query information about the access. It is used by the plugin
+ * infrastructure to expose more information about the address.
+ *
+ * It would only fail if not called from an instrumented memory access
+ * which would be an abuse of the API.
+ */
+bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
+                       bool is_store, struct qemu_plugin_hwaddr *data);
+
+#endif /* _PLUGIN_MEMORY_H_ */
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
new file mode 100644
index 0000000000..11687e8cdc
--- /dev/null
+++ b/include/qemu/plugin.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_PLUGIN_H
+#define QEMU_PLUGIN_H
+
+#include "qemu/config-file.h"
+#include "qemu/qemu-plugin.h"
+#include "qemu/error-report.h"
+#include "qemu/queue.h"
+#include "qemu/option.h"
+
+/*
+ * Option parsing/processing.
+ * Note that we can load an arbitrary number of plugins.
+ */
+struct qemu_plugin_desc;
+typedef QTAILQ_HEAD(, qemu_plugin_desc) QemuPluginList;
+
+#ifdef CONFIG_PLUGIN
+extern QemuOptsList qemu_plugin_opts;
+
+static inline void qemu_plugin_add_opts(void)
+{
+    qemu_add_opts(&qemu_plugin_opts);
+}
+
+void qemu_plugin_opt_parse(const char *optarg, QemuPluginList *head);
+int qemu_plugin_load_list(QemuPluginList *head);
+#else /* !CONFIG_PLUGIN */
+static inline void qemu_plugin_add_opts(void)
+{ }
+
+static inline void qemu_plugin_opt_parse(const char *optarg,
+                                         QemuPluginList *head)
+{
+    error_report("plugin interface not enabled in this build");
+    exit(1);
+}
+
+static inline int qemu_plugin_load_list(QemuPluginList *head)
+{
+    return 0;
+}
+#endif /* !CONFIG_PLUGIN */
+
+/*
+ * Events that plugins can subscribe to.
+ */
+enum qemu_plugin_event {
+    QEMU_PLUGIN_EV_VCPU_INIT,
+    QEMU_PLUGIN_EV_VCPU_EXIT,
+    QEMU_PLUGIN_EV_VCPU_TB_TRANS,
+    QEMU_PLUGIN_EV_VCPU_IDLE,
+    QEMU_PLUGIN_EV_VCPU_RESUME,
+    QEMU_PLUGIN_EV_VCPU_SYSCALL,
+    QEMU_PLUGIN_EV_VCPU_SYSCALL_RET,
+    QEMU_PLUGIN_EV_FLUSH,
+    QEMU_PLUGIN_EV_ATEXIT,
+    QEMU_PLUGIN_EV_MAX, /* total number of plugin events we support */
+};
+
+union qemu_plugin_cb_sig {
+    qemu_plugin_simple_cb_t          simple;
+    qemu_plugin_udata_cb_t           udata;
+    qemu_plugin_vcpu_simple_cb_t     vcpu_simple;
+    qemu_plugin_vcpu_udata_cb_t      vcpu_udata;
+    qemu_plugin_vcpu_tb_trans_cb_t   vcpu_tb_trans;
+    qemu_plugin_vcpu_mem_cb_t        vcpu_mem;
+    qemu_plugin_vcpu_syscall_cb_t    vcpu_syscall;
+    qemu_plugin_vcpu_syscall_ret_cb_t vcpu_syscall_ret;
+    void *generic;
+};
+
+enum plugin_dyn_cb_type {
+    PLUGIN_CB_INSN,
+    PLUGIN_CB_MEM,
+    PLUGIN_N_CB_TYPES,
+};
+
+enum plugin_dyn_cb_subtype {
+    PLUGIN_CB_REGULAR,
+    PLUGIN_CB_INLINE,
+    PLUGIN_N_CB_SUBTYPES,
+};
+
+/*
+ * A dynamic callback has an insertion point that is determined at run-time.
+ * Usually the insertion point is somewhere in the code cache; think for
+ * instance of a callback to be called upon the execution of a particular TB.
+ */
+struct qemu_plugin_dyn_cb {
+    union qemu_plugin_cb_sig f;
+    void *userp;
+    unsigned tcg_flags;
+    enum plugin_dyn_cb_subtype type;
+    /* @rw applies to mem callbacks only (both regular and inline) */
+    enum qemu_plugin_mem_rw rw;
+    /* fields specific to each dyn_cb type go here */
+    union {
+        struct {
+            enum qemu_plugin_op op;
+            uint64_t imm;
+        } inline_insn;
+    };
+};
+
+struct qemu_plugin_insn {
+    GByteArray *data;
+    uint64_t vaddr;
+    void *haddr;
+    GArray *cbs[PLUGIN_N_CB_TYPES][PLUGIN_N_CB_SUBTYPES];
+    bool calls_helpers;
+    bool mem_helper;
+};
+
+/*
+ * qemu_plugin_insn allocate and cleanup functions. We don't expect to
+ * cleanup many of these structures. They are reused for each fresh
+ * translation.
+ */
+
+static inline void qemu_plugin_insn_cleanup_fn(gpointer data)
+{
+    struct qemu_plugin_insn *insn = (struct qemu_plugin_insn *) data;
+    g_byte_array_free(insn->data, true);
+}
+
+static inline struct qemu_plugin_insn *qemu_plugin_insn_alloc(void)
+{
+    int i, j;
+    struct qemu_plugin_insn *insn = g_new0(struct qemu_plugin_insn, 1);
+    insn->data = g_byte_array_sized_new(4);
+
+    for (i = 0; i < PLUGIN_N_CB_TYPES; i++) {
+        for (j = 0; j < PLUGIN_N_CB_SUBTYPES; j++) {
+            insn->cbs[i][j] = g_array_new(false, false,
+                                          sizeof(struct qemu_plugin_dyn_cb));
+        }
+    }
+    return insn;
+}
+
+struct qemu_plugin_tb {
+    GPtrArray *insns;
+    size_t n;
+    uint64_t vaddr;
+    uint64_t vaddr2;
+    void *haddr1;
+    void *haddr2;
+    GArray *cbs[PLUGIN_N_CB_SUBTYPES];
+};
+
+/**
+ * qemu_plugin_tb_insn_get(): get next plugin record for translation.
+ *
+ */
+static inline
+struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb)
+{
+    struct qemu_plugin_insn *insn;
+    int i, j;
+
+    if (unlikely(tb->n == tb->insns->len)) {
+        struct qemu_plugin_insn *new_insn = qemu_plugin_insn_alloc();
+        g_ptr_array_add(tb->insns, new_insn);
+    }
+    insn = g_ptr_array_index(tb->insns, tb->n++);
+    g_byte_array_set_size(insn->data, 0);
+    insn->calls_helpers = false;
+    insn->mem_helper = false;
+
+    for (i = 0; i < PLUGIN_N_CB_TYPES; i++) {
+        for (j = 0; j < PLUGIN_N_CB_SUBTYPES; j++) {
+            g_array_set_size(insn->cbs[i][j], 0);
+        }
+    }
+
+    return insn;
+}
+
+#ifdef CONFIG_PLUGIN
+
+void qemu_plugin_vcpu_init_hook(CPUState *cpu);
+void qemu_plugin_vcpu_exit_hook(CPUState *cpu);
+void qemu_plugin_tb_trans_cb(CPUState *cpu, struct qemu_plugin_tb *tb);
+void qemu_plugin_vcpu_idle_cb(CPUState *cpu);
+void qemu_plugin_vcpu_resume_cb(CPUState *cpu);
+void
+qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1,
+                         uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5,
+                         uint64_t a6, uint64_t a7, uint64_t a8);
+void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret);
+
+void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t meminfo);
+
+void qemu_plugin_flush_cb(void);
+
+void qemu_plugin_atexit_cb(void);
+
+void qemu_plugin_add_dyn_cb_arr(GArray *arr);
+
+void qemu_plugin_disable_mem_helpers(CPUState *cpu);
+
+#else /* !CONFIG_PLUGIN */
+
+static inline void qemu_plugin_vcpu_init_hook(CPUState *cpu)
+{ }
+
+static inline void qemu_plugin_vcpu_exit_hook(CPUState *cpu)
+{ }
+
+static inline void qemu_plugin_tb_trans_cb(CPUState *cpu,
+                                           struct qemu_plugin_tb *tb)
+{ }
+
+static inline void qemu_plugin_vcpu_idle_cb(CPUState *cpu)
+{ }
+
+static inline void qemu_plugin_vcpu_resume_cb(CPUState *cpu)
+{ }
+
+static inline void
+qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1, uint64_t a2,
+                         uint64_t a3, uint64_t a4, uint64_t a5, uint64_t a6,
+                         uint64_t a7, uint64_t a8)
+{ }
+
+static inline
+void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
+{ }
+
+static inline void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
+                                           uint32_t meminfo)
+{ }
+
+static inline void qemu_plugin_flush_cb(void)
+{ }
+
+static inline void qemu_plugin_atexit_cb(void)
+{ }
+
+static inline
+void qemu_plugin_add_dyn_cb_arr(GArray *arr)
+{ }
+
+static inline void qemu_plugin_disable_mem_helpers(CPUState *cpu)
+{ }
+
+#endif /* !CONFIG_PLUGIN */
+
+#endif /* QEMU_PLUGIN_H */
diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
new file mode 100644
index 0000000000..a00a7deb46
--- /dev/null
+++ b/include/qemu/qemu-plugin.h
@@ -0,0 +1,393 @@
+/*
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019, Linaro
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef QEMU_PLUGIN_API_H
+#define QEMU_PLUGIN_API_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+/*
+ * For best performance, build the plugin with -fvisibility=hidden so that
+ * QEMU_PLUGIN_LOCAL is implicit. Then, just mark qemu_plugin_install with
+ * QEMU_PLUGIN_EXPORT. For more info, see
+ *   https://gcc.gnu.org/wiki/Visibility
+ */
+#if defined _WIN32 || defined __CYGWIN__
+  #ifdef BUILDING_DLL
+    #define QEMU_PLUGIN_EXPORT __declspec(dllexport)
+  #else
+    #define QEMU_PLUGIN_EXPORT __declspec(dllimport)
+  #endif
+  #define QEMU_PLUGIN_LOCAL
+#else
+  #if __GNUC__ >= 4
+    #define QEMU_PLUGIN_EXPORT __attribute__((visibility("default")))
+    #define QEMU_PLUGIN_LOCAL  __attribute__((visibility("hidden")))
+  #else
+    #define QEMU_PLUGIN_EXPORT
+    #define QEMU_PLUGIN_LOCAL
+  #endif
+#endif
+
+typedef uint64_t qemu_plugin_id_t;
+
+typedef struct {
+    /* string describing architecture */
+    const char *target_name;
+    /* is this a full system emulation? */
+    bool system_emulation;
+    union {
+        /*
+         * smp_vcpus may change if vCPUs can be hot-plugged, max_vcpus
+         * is the system-wide limit.
+         */
+        struct {
+            int smp_vcpus;
+            int max_vcpus;
+        } system;
+    };
+} qemu_info_t;
+
+/**
+ * qemu_plugin_install() - Install a plugin
+ * @id: this plugin's opaque ID
+ * @info: a block describing some details about the guest
+ * @argc: number of arguments
+ * @argv: array of arguments (@argc elements)
+ *
+ * All plugins must export this symbol.
+ *
+ * Note: Calling qemu_plugin_uninstall() from this function is a bug. To raise
+ * an error during install, return !0.
+ *
+ * Note: @info is only live during the call. Copy any information we
+ * want to keep.
+ *
+ * Note: @argv remains valid throughout the lifetime of the loaded plugin.
+ */
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info,
+                                           int argc, char **argv);
+
+/*
+ * Prototypes for the various callback styles we will be registering
+ * in the following functions.
+ */
+typedef void (*qemu_plugin_simple_cb_t)(qemu_plugin_id_t id);
+
+typedef void (*qemu_plugin_udata_cb_t)(qemu_plugin_id_t id, void *userdata);
+
+typedef void (*qemu_plugin_vcpu_simple_cb_t)(qemu_plugin_id_t id,
+                                             unsigned int vcpu_index);
+
+typedef void (*qemu_plugin_vcpu_udata_cb_t)(unsigned int vcpu_index,
+                                            void *userdata);
+
+/**
+ * qemu_plugin_uninstall() - Uninstall a plugin
+ * @id: this plugin's opaque ID
+ * @cb: callback to be called once the plugin has been removed
+ *
+ * Do NOT assume that the plugin has been uninstalled once this function
+ * returns. Plugins are uninstalled asynchronously, and therefore the given
+ * plugin receives callbacks until @cb is called.
+ *
+ * Note: Calling this function from qemu_plugin_install() is a bug.
+ */
+void qemu_plugin_uninstall(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb);
+
+/**
+ * qemu_plugin_reset() - Reset a plugin
+ * @id: this plugin's opaque ID
+ * @cb: callback to be called once the plugin has been reset
+ *
+ * Unregisters all callbacks for the plugin given by @id.
+ *
+ * Do NOT assume that the plugin has been reset once this function returns.
+ * Plugins are reset asynchronously, and therefore the given plugin receives
+ * callbacks until @cb is called.
+ */
+void qemu_plugin_reset(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb);
+
+/**
+ * qemu_plugin_register_vcpu_init_cb() - register a vCPU initialization callback
+ * @id: plugin ID
+ * @cb: callback function
+ *
+ * The @cb function is called every time a vCPU is initialized.
+ *
+ * See also: qemu_plugin_register_vcpu_exit_cb()
+ */
+void qemu_plugin_register_vcpu_init_cb(qemu_plugin_id_t id,
+                                       qemu_plugin_vcpu_simple_cb_t cb);
+
+/**
+ * qemu_plugin_register_vcpu_exit_cb() - register a vCPU exit callback
+ * @id: plugin ID
+ * @cb: callback function
+ *
+ * The @cb function is called every time a vCPU exits.
+ *
+ * See also: qemu_plugin_register_vcpu_init_cb()
+ */
+void qemu_plugin_register_vcpu_exit_cb(qemu_plugin_id_t id,
+                                       qemu_plugin_vcpu_simple_cb_t cb);
+
+/**
+ * qemu_plugin_register_vcpu_idle_cb() - register a vCPU idle callback
+ * @id: plugin ID
+ * @cb: callback function
+ *
+ * The @cb function is called every time a vCPU idles.
+ */
+void qemu_plugin_register_vcpu_idle_cb(qemu_plugin_id_t id,
+                                       qemu_plugin_vcpu_simple_cb_t cb);
+
+/**
+ * qemu_plugin_register_vcpu_resume_cb() - register a vCPU resume callback
+ * @id: plugin ID
+ * @cb: callback function
+ *
+ * The @cb function is called every time a vCPU resumes execution.
+ */
+void qemu_plugin_register_vcpu_resume_cb(qemu_plugin_id_t id,
+                                         qemu_plugin_vcpu_simple_cb_t cb);
+
+/*
+ * Opaque types that the plugin is given during the translation and
+ * instrumentation phase.
+ */
+struct qemu_plugin_tb;
+struct qemu_plugin_insn;
+
+enum qemu_plugin_cb_flags {
+    QEMU_PLUGIN_CB_NO_REGS, /* callback does not access the CPU's regs */
+    QEMU_PLUGIN_CB_R_REGS,  /* callback reads the CPU's regs */
+    QEMU_PLUGIN_CB_RW_REGS, /* callback reads and writes the CPU's regs */
+};
+
+enum qemu_plugin_mem_rw {
+    QEMU_PLUGIN_MEM_R = 1,
+    QEMU_PLUGIN_MEM_W,
+    QEMU_PLUGIN_MEM_RW,
+};
+
+/**
+ * qemu_plugin_register_vcpu_tb_trans_cb() - register a translate cb
+ * @id: plugin ID
+ * @cb: callback function
+ *
+ * The @cb function is called every time a translation occurs. The @cb
+ * function is passed an opaque qemu_plugin_type which it can query
+ * for additional information including the list of translated
+ * instructions. At this point the plugin can register further
+ * callbacks to be triggered when the block or individual instruction
+ * executes.
+ */
+typedef void (*qemu_plugin_vcpu_tb_trans_cb_t)(qemu_plugin_id_t id,
+                                               struct qemu_plugin_tb *tb);
+
+void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id,
+                                           qemu_plugin_vcpu_tb_trans_cb_t cb);
+
+/**
+ * qemu_plugin_register_vcpu_tb_trans_exec_cb() - register execution callback
+ * @tb: the opaque qemu_plugin_tb handle for the translation
+ * @cb: callback function
+ * @flags: does the plugin read or write the CPU's registers?
+ * @userdata: any plugin data to pass to the @cb?
+ *
+ * The @cb function is called every time a translated unit executes.
+ */
+void qemu_plugin_register_vcpu_tb_exec_cb(struct qemu_plugin_tb *tb,
+                                          qemu_plugin_vcpu_udata_cb_t cb,
+                                          enum qemu_plugin_cb_flags flags,
+                                          void *userdata);
+
+enum qemu_plugin_op {
+    QEMU_PLUGIN_INLINE_ADD_U64,
+};
+
+/**
+ * qemu_plugin_register_vcpu_tb_trans_exec_inline() - execution inline op
+ * @tb: the opaque qemu_plugin_tb handle for the translation
+ * @op: the type of qemu_plugin_op (e.g. ADD_U64)
+ * @ptr: the target memory location for the op
+ * @imm: the op data (e.g. 1)
+ *
+ * Insert an inline op to every time a translated unit executes.
+ * Useful if you just want to increment a single counter somewhere in
+ * memory.
+ */
+void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb,
+                                              enum qemu_plugin_op op,
+                                              void *ptr, uint64_t imm);
+
+/**
+ * qemu_plugin_register_vcpu_insn_exec_cb() - register insn execution cb
+ * @insn: the opaque qemu_plugin_insn handle for an instruction
+ * @cb: callback function
+ * @flags: does the plugin read or write the CPU's registers?
+ * @userdata: any plugin data to pass to the @cb?
+ *
+ * The @cb function is called every time an instruction is executed
+ */
+void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
+                                            qemu_plugin_vcpu_udata_cb_t cb,
+                                            enum qemu_plugin_cb_flags flags,
+                                            void *userdata);
+
+/**
+ * qemu_plugin_register_vcpu_insn_exec_inline() - insn execution inline op
+ * @insn: the opaque qemu_plugin_insn handle for an instruction
+ * @cb: callback function
+ * @op: the type of qemu_plugin_op (e.g. ADD_U64)
+ * @ptr: the target memory location for the op
+ * @imm: the op data (e.g. 1)
+ *
+ * Insert an inline op to every time an instruction executes. Useful
+ * if you just want to increment a single counter somewhere in memory.
+ */
+void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,
+                                                enum qemu_plugin_op op,
+                                                void *ptr, uint64_t imm);
+
+/*
+ * Helpers to query information about the instructions in a block
+ */
+size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb);
+
+uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb);
+
+struct qemu_plugin_insn *
+qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx);
+
+const void *qemu_plugin_insn_data(const struct qemu_plugin_insn *insn);
+
+size_t qemu_plugin_insn_size(const struct qemu_plugin_insn *insn);
+
+uint64_t qemu_plugin_insn_vaddr(const struct qemu_plugin_insn *insn);
+void *qemu_plugin_insn_haddr(const struct qemu_plugin_insn *insn);
+
+/*
+ * Memory Instrumentation
+ *
+ * The anonymous qemu_plugin_meminfo_t and qemu_plugin_hwaddr types
+ * can be used in queries to QEMU to get more information about a
+ * given memory access.
+ */
+typedef uint32_t qemu_plugin_meminfo_t;
+struct qemu_plugin_hwaddr;
+
+/* meminfo queries */
+unsigned int qemu_plugin_mem_size_shift(qemu_plugin_meminfo_t info);
+bool qemu_plugin_mem_is_sign_extended(qemu_plugin_meminfo_t info);
+bool qemu_plugin_mem_is_big_endian(qemu_plugin_meminfo_t info);
+bool qemu_plugin_mem_is_store(qemu_plugin_meminfo_t info);
+
+/*
+ * qemu_plugin_get_hwaddr():
+ * @vaddr: the virtual address of the memory operation
+ *
+ * For system emulation returns a qemu_plugin_hwaddr handle to query
+ * details about the actual physical address backing the virtual
+ * address. For linux-user guests it just returns NULL.
+ *
+ * This handle is *only* valid for the duration of the callback. Any
+ * information about the handle should be recovered before the
+ * callback returns.
+ */
+struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
+                                                  uint64_t vaddr);
+
+/*
+ * The following additional queries can be run on the hwaddr structure
+ * to return information about it. For non-IO accesses the device
+ * offset will be into the appropriate block of RAM.
+ */
+bool qemu_plugin_hwaddr_is_io(struct qemu_plugin_hwaddr *hwaddr);
+uint64_t qemu_plugin_hwaddr_device_offset(const struct qemu_plugin_hwaddr *haddr);
+
+typedef void
+(*qemu_plugin_vcpu_mem_cb_t)(unsigned int vcpu_index,
+                             qemu_plugin_meminfo_t info, uint64_t vaddr,
+                             void *userdata);
+
+void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn,
+                                      qemu_plugin_vcpu_mem_cb_t cb,
+                                      enum qemu_plugin_cb_flags flags,
+                                      enum qemu_plugin_mem_rw rw,
+                                      void *userdata);
+
+void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn,
+                                          enum qemu_plugin_mem_rw rw,
+                                          enum qemu_plugin_op op, void *ptr,
+                                          uint64_t imm);
+
+
+
+typedef void
+(*qemu_plugin_vcpu_syscall_cb_t)(qemu_plugin_id_t id, unsigned int vcpu_index,
+                                 int64_t num, uint64_t a1, uint64_t a2,
+                                 uint64_t a3, uint64_t a4, uint64_t a5,
+                                 uint64_t a6, uint64_t a7, uint64_t a8);
+
+void qemu_plugin_register_vcpu_syscall_cb(qemu_plugin_id_t id,
+                                          qemu_plugin_vcpu_syscall_cb_t cb);
+
+typedef void
+(*qemu_plugin_vcpu_syscall_ret_cb_t)(qemu_plugin_id_t id, unsigned int vcpu_idx,
+                                     int64_t num, int64_t ret);
+
+void
+qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id,
+                                         qemu_plugin_vcpu_syscall_ret_cb_t cb);
+
+
+/**
+ * qemu_plugin_insn_disas() - return disassembly string for instruction
+ * @insn: instruction reference
+ *
+ * Returns an allocated string containing the disassembly
+ */
+
+char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn);
+
+/**
+ * qemu_plugin_vcpu_for_each() - iterate over the existing vCPU
+ * @id: plugin ID
+ * @cb: callback function
+ *
+ * The @cb function is called once for each existing vCPU.
+ *
+ * See also: qemu_plugin_register_vcpu_init_cb()
+ */
+void qemu_plugin_vcpu_for_each(qemu_plugin_id_t id,
+                               qemu_plugin_vcpu_simple_cb_t cb);
+
+void qemu_plugin_register_flush_cb(qemu_plugin_id_t id,
+                                   qemu_plugin_simple_cb_t cb);
+
+void qemu_plugin_register_atexit_cb(qemu_plugin_id_t id,
+                                    qemu_plugin_udata_cb_t cb, void *userdata);
+
+/* returns -1 in user-mode */
+int qemu_plugin_n_vcpus(void);
+
+/* returns -1 in user-mode */
+int qemu_plugin_n_max_vcpus(void);
+
+/**
+ * qemu_plugin_outs() - output string via QEMU's logging system
+ * @string: a string
+ */
+void qemu_plugin_outs(const char *string);
+
+#endif /* QEMU_PLUGIN_API_H */
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index 73bf4a984d..4764d93ea3 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -420,6 +420,16 @@ union {                                                                 \
         (elm)->field.tqe_circ.tql_prev = NULL;                          \
 } while (/*CONSTCOND*/0)
 
+/* remove @left, @right and all elements in between from @head */
+#define QTAILQ_REMOVE_SEVERAL(head, left, right, field) do {            \
+        if (((right)->field.tqe_next) != NULL)                          \
+            (right)->field.tqe_next->field.tqe_circ.tql_prev =          \
+                (left)->field.tqe_circ.tql_prev;                        \
+        else                                                            \
+            (head)->tqh_circ.tql_prev = (left)->field.tqe_circ.tql_prev; \
+        (left)->field.tqe_circ.tql_prev->tql_next = (right)->field.tqe_next; \
+    } while (/*CONSTCOND*/0)
+
 #define QTAILQ_FOREACH(var, head, field)                                \
         for ((var) = ((head)->tqh_first);                               \
                 (var);                                                  \
diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h
new file mode 100644
index 0000000000..9e60473643
--- /dev/null
+++ b/include/user/syscall-trace.h
@@ -0,0 +1,40 @@
+/*
+ * Common System Call Tracing Wrappers for *-user
+ *
+ * Copyright (c) 2019 Linaro
+ * Written by Alex Bennée <alex.bennee@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _SYSCALL_TRACE_H_
+#define _SYSCALL_TRACE_H_
+
+/*
+ * These helpers just provide a common place for the various
+ * subsystems that want to track syscalls to put their hooks in. We
+ * could potentially unify the -strace code here as well.
+ */
+
+static inline void record_syscall_start(void *cpu, int num,
+                                        abi_long arg1, abi_long arg2,
+                                        abi_long arg3, abi_long arg4,
+                                        abi_long arg5, abi_long arg6,
+                                        abi_long arg7, abi_long arg8)
+{
+    trace_guest_user_syscall(cpu, num,
+                             arg1, arg2, arg3, arg4,
+                             arg5, arg6, arg7, arg8);
+    qemu_plugin_vcpu_syscall(cpu, num,
+                             arg1, arg2, arg3, arg4,
+                             arg5, arg6, arg7, arg8);
+}
+
+static inline void record_syscall_return(void *cpu, int num, abi_long ret)
+{
+    trace_guest_user_syscall_ret(cpu, num, ret);
+    qemu_plugin_vcpu_syscall_ret(cpu, num, ret);
+}
+
+
+#endif /* _SYSCALL_TRACE_H_ */
diff --git a/linux-user/exit.c b/linux-user/exit.c
index bdda720553..a362ef67d2 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -35,4 +35,5 @@ void preexit_cleanup(CPUArchState *env, int code)
         __gcov_dump();
 #endif
         gdb_exit(env, code);
+        qemu_plugin_atexit_cb();
 }
diff --git a/linux-user/main.c b/linux-user/main.c
index 560d053f72..6ff7851e86 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -34,6 +34,7 @@
 #include "qemu/error-report.h"
 #include "qemu/help_option.h"
 #include "qemu/module.h"
+#include "qemu/plugin.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "tcg.h"
@@ -398,6 +399,15 @@ static void handle_arg_abi_call0(const char *arg)
 }
 #endif
 
+static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
+
+#ifdef CONFIG_PLUGIN
+static void handle_arg_plugin(const char *arg)
+{
+    qemu_plugin_opt_parse(arg, &plugins);
+}
+#endif
+
 struct qemu_argument {
     const char *argv;
     const char *env;
@@ -449,6 +459,10 @@ static const struct qemu_argument arg_table[] = {
      "",           "Seed for pseudo-random number generator"},
     {"trace",      "QEMU_TRACE",       true,  handle_arg_trace,
      "",           "[[enable=]<pattern>][,events=<file>][,file=<file>]"},
+#ifdef CONFIG_PLUGIN
+    {"plugin",     "QEMU_PLUGIN",      true,  handle_arg_plugin,
+     "",           "[file=]<file>[,arg=<string>]"},
+#endif
     {"version",    "QEMU_VERSION",     false, handle_arg_version,
      "",           "display version information and exit"},
 #if defined(TARGET_XTENSA)
@@ -643,6 +657,7 @@ int main(int argc, char **argv, char **envp)
     cpu_model = NULL;
 
     qemu_add_opts(&qemu_trace_opts);
+    qemu_plugin_add_opts();
 
     optind = parse_args(argc, argv);
 
@@ -650,6 +665,9 @@ int main(int argc, char **argv, char **envp)
         exit(1);
     }
     trace_init_file(trace_file);
+    if (qemu_plugin_load_list(&plugins)) {
+        exit(1);
+    }
 
     /* Zero out regs */
     memset(regs, 0, sizeof(struct target_pt_regs));
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 530c843303..f6751eecb7 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -112,6 +112,7 @@
 
 #include "qemu.h"
 #include "qemu/guest-random.h"
+#include "user/syscall-trace.h"
 #include "qapi/error.h"
 #include "fd-trans.h"
 
@@ -11984,8 +11985,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     }
 #endif
 
-    trace_guest_user_syscall(cpu, num, arg1, arg2, arg3, arg4,
-                             arg5, arg6, arg7, arg8);
+    record_syscall_start(cpu, num, arg1,
+                         arg2, arg3, arg4, arg5, arg6, arg7, arg8);
 
     if (unlikely(do_strace)) {
         print_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
@@ -11997,6 +11998,6 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                           arg5, arg6, arg7, arg8);
     }
 
-    trace_guest_user_syscall_ret(cpu, num, ret);
+    record_syscall_return(cpu, num, ret);
     return ret;
 }
diff --git a/plugins/.gitignore b/plugins/.gitignore
new file mode 100644
index 0000000000..7b8aaa1f10
--- /dev/null
+++ b/plugins/.gitignore
@@ -0,0 +1,2 @@
+qemu-plugins-ld.symbols
+qemu-plugins-ld64.symbols
diff --git a/plugins/Makefile.objs b/plugins/Makefile.objs
new file mode 100644
index 0000000000..6f14d91ccb
--- /dev/null
+++ b/plugins/Makefile.objs
@@ -0,0 +1,21 @@
+#
+# Plugin Support
+#
+
+obj-y += loader.o
+obj-y += core.o
+obj-y += api.o
+
+# Abuse -libs suffix to only link with --dynamic-list/-exported_symbols_list
+# when the final binary includes the plugin object.
+#
+# Note that simply setting LDFLAGS is not enough: we build binaries that
+# never link plugin.o, and the linker might fail (at least ld64 does)
+# if the symbols in the list are not in the output binary.
+ifdef CONFIG_HAS_LD_DYNAMIC_LIST
+api.o-libs := -Wl,--dynamic-list=$(BUILD_DIR)/qemu-plugins-ld.symbols
+else
+ifdef CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST
+api.o-libs := -Wl,-exported_symbols_list,$(BUILD_DIR)/qemu-plugins-ld64.symbols
+endif
+endif
diff --git a/plugins/api.c b/plugins/api.c
new file mode 100644
index 0000000000..fa1d9f276d
--- /dev/null
+++ b/plugins/api.c
@@ -0,0 +1,341 @@
+/*
+ * QEMU Plugin API
+ *
+ * This provides the API that is available to the plugins to interact
+ * with QEMU. We have to be careful not to expose internal details of
+ * how QEMU works so we abstract out things like translation and
+ * instructions to anonymous data types:
+ *
+ *  qemu_plugin_tb
+ *  qemu_plugin_insn
+ *
+ * Which can then be passed back into the API to do additional things.
+ * As such all the public functions in here are exported in
+ * qemu-plugin.h.
+ *
+ * The general life-cycle of a plugin is:
+ *
+ *  - plugin is loaded, public qemu_plugin_install called
+ *    - the install func registers callbacks for events
+ *    - usually an atexit_cb is registered to dump info at the end
+ *  - when a registered event occurs the plugin is called
+ *     - some events pass additional info
+ *     - during translation the plugin can decide to instrument any
+ *       instruction
+ *  - when QEMU exits all the registered atexit callbacks are called
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019, Linaro
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/plugin.h"
+#include "cpu.h"
+#include "sysemu/sysemu.h"
+#include "tcg/tcg.h"
+#include "exec/exec-all.h"
+#include "disas/disas.h"
+#include "plugin.h"
+#ifndef CONFIG_USER_ONLY
+#include "qemu/plugin-memory.h"
+#include "hw/boards.h"
+#endif
+
+/* Uninstall and Reset handlers */
+
+void qemu_plugin_uninstall(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb)
+{
+    plugin_reset_uninstall(id, cb, false);
+}
+
+void qemu_plugin_reset(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb)
+{
+    plugin_reset_uninstall(id, cb, true);
+}
+
+/*
+ * Plugin Register Functions
+ *
+ * This allows the plugin to register callbacks for various events
+ * during the translation.
+ */
+
+void qemu_plugin_register_vcpu_init_cb(qemu_plugin_id_t id,
+                                       qemu_plugin_vcpu_simple_cb_t cb)
+{
+    plugin_register_cb(id, QEMU_PLUGIN_EV_VCPU_INIT, cb);
+}
+
+void qemu_plugin_register_vcpu_exit_cb(qemu_plugin_id_t id,
+                                       qemu_plugin_vcpu_simple_cb_t cb)
+{
+    plugin_register_cb(id, QEMU_PLUGIN_EV_VCPU_EXIT, cb);
+}
+
+void qemu_plugin_register_vcpu_tb_exec_cb(struct qemu_plugin_tb *tb,
+                                          qemu_plugin_vcpu_udata_cb_t cb,
+                                          enum qemu_plugin_cb_flags flags,
+                                          void *udata)
+{
+    plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR],
+                                  cb, flags, udata);
+}
+
+void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb,
+                                              enum qemu_plugin_op op,
+                                              void *ptr, uint64_t imm)
+{
+    plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm);
+}
+
+void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
+                                            qemu_plugin_vcpu_udata_cb_t cb,
+                                            enum qemu_plugin_cb_flags flags,
+                                            void *udata)
+{
+    plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR],
+        cb, flags, udata);
+}
+
+void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,
+                                                enum qemu_plugin_op op,
+                                                void *ptr, uint64_t imm)
+{
+    plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
+                              0, op, ptr, imm);
+}
+
+
+
+void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn,
+                                      qemu_plugin_vcpu_mem_cb_t cb,
+                                      enum qemu_plugin_cb_flags flags,
+                                      enum qemu_plugin_mem_rw rw,
+                                      void *udata)
+{
+    plugin_register_vcpu_mem_cb(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR],
+                                cb, flags, rw, udata);
+}
+
+void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn,
+                                          enum qemu_plugin_mem_rw rw,
+                                          enum qemu_plugin_op op, void *ptr,
+                                          uint64_t imm)
+{
+    plugin_register_inline_op(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE],
+        rw, op, ptr, imm);
+}
+
+void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id,
+                                           qemu_plugin_vcpu_tb_trans_cb_t cb)
+{
+    plugin_register_cb(id, QEMU_PLUGIN_EV_VCPU_TB_TRANS, cb);
+}
+
+void qemu_plugin_register_vcpu_syscall_cb(qemu_plugin_id_t id,
+                                          qemu_plugin_vcpu_syscall_cb_t cb)
+{
+    plugin_register_cb(id, QEMU_PLUGIN_EV_VCPU_SYSCALL, cb);
+}
+
+void
+qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id,
+                                         qemu_plugin_vcpu_syscall_ret_cb_t cb)
+{
+    plugin_register_cb(id, QEMU_PLUGIN_EV_VCPU_SYSCALL_RET, cb);
+}
+
+/*
+ * Plugin Queries
+ *
+ * These are queries that the plugin can make to gauge information
+ * from our opaque data types. We do not want to leak internal details
+ * here just information useful to the plugin.
+ */
+
+/*
+ * Translation block information:
+ *
+ * A plugin can query the virtual address of the start of the block
+ * and the number of instructions in it. It can also get access to
+ * each translated instruction.
+ */
+
+size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb)
+{
+    return tb->n;
+}
+
+uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb)
+{
+    return tb->vaddr;
+}
+
+struct qemu_plugin_insn *
+qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx)
+{
+    if (unlikely(idx >= tb->n)) {
+        return NULL;
+    }
+    return g_ptr_array_index(tb->insns, idx);
+}
+
+/*
+ * Instruction information
+ *
+ * These queries allow the plugin to retrieve information about each
+ * instruction being translated.
+ */
+
+const void *qemu_plugin_insn_data(const struct qemu_plugin_insn *insn)
+{
+    return insn->data->data;
+}
+
+size_t qemu_plugin_insn_size(const struct qemu_plugin_insn *insn)
+{
+    return insn->data->len;
+}
+
+uint64_t qemu_plugin_insn_vaddr(const struct qemu_plugin_insn *insn)
+{
+    return insn->vaddr;
+}
+
+void *qemu_plugin_insn_haddr(const struct qemu_plugin_insn *insn)
+{
+    return insn->haddr;
+}
+
+char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn)
+{
+    CPUState *cpu = current_cpu;
+    return plugin_disas(cpu, insn->vaddr, insn->data->len);
+}
+
+/*
+ * The memory queries allow the plugin to query information about a
+ * memory access.
+ */
+
+unsigned qemu_plugin_mem_size_shift(qemu_plugin_meminfo_t info)
+{
+    return info & TRACE_MEM_SZ_SHIFT_MASK;
+}
+
+bool qemu_plugin_mem_is_sign_extended(qemu_plugin_meminfo_t info)
+{
+    return !!(info & TRACE_MEM_SE);
+}
+
+bool qemu_plugin_mem_is_big_endian(qemu_plugin_meminfo_t info)
+{
+    return !!(info & TRACE_MEM_BE);
+}
+
+bool qemu_plugin_mem_is_store(qemu_plugin_meminfo_t info)
+{
+    return !!(info & TRACE_MEM_ST);
+}
+
+/*
+ * Virtual Memory queries
+ */
+
+#ifdef CONFIG_SOFTMMU
+static __thread struct qemu_plugin_hwaddr hwaddr_info;
+
+struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
+                                                  uint64_t vaddr)
+{
+    CPUState *cpu = current_cpu;
+    unsigned int mmu_idx = info >> TRACE_MEM_MMU_SHIFT;
+    hwaddr_info.is_store = info & TRACE_MEM_ST;
+
+    if (!tlb_plugin_lookup(cpu, vaddr, mmu_idx,
+                           info & TRACE_MEM_ST, &hwaddr_info)) {
+        error_report("invalid use of qemu_plugin_get_hwaddr");
+        return NULL;
+    }
+
+    return &hwaddr_info;
+}
+#else
+struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
+                                                  uint64_t vaddr)
+{
+    return NULL;
+}
+#endif
+
+bool qemu_plugin_hwaddr_is_io(struct qemu_plugin_hwaddr *hwaddr)
+{
+#ifdef CONFIG_SOFTMMU
+    return hwaddr->is_io;
+#else
+    return false;
+#endif
+}
+
+uint64_t qemu_plugin_hwaddr_device_offset(const struct qemu_plugin_hwaddr *haddr)
+{
+#ifdef CONFIG_SOFTMMU
+    if (haddr) {
+        if (!haddr->is_io) {
+            ram_addr_t ram_addr = qemu_ram_addr_from_host((void *) haddr->v.ram.hostaddr);
+            if (ram_addr == RAM_ADDR_INVALID) {
+                error_report("Bad ram pointer %"PRIx64"", haddr->v.ram.hostaddr);
+                abort();
+            }
+            return ram_addr;
+        } else {
+            return haddr->v.io.offset;
+        }
+    }
+#endif
+    return 0;
+}
+
+/*
+ * Queries to the number and potential maximum number of vCPUs there
+ * will be. This helps the plugin dimension per-vcpu arrays.
+ */
+
+#ifndef CONFIG_USER_ONLY
+static MachineState * get_ms(void)
+{
+    return MACHINE(qdev_get_machine());
+}
+#endif
+
+int qemu_plugin_n_vcpus(void)
+{
+#ifdef CONFIG_USER_ONLY
+    return -1;
+#else
+    return get_ms()->smp.cpus;
+#endif
+}
+
+int qemu_plugin_n_max_vcpus(void)
+{
+#ifdef CONFIG_USER_ONLY
+    return -1;
+#else
+    return get_ms()->smp.max_cpus;
+#endif
+}
+
+/*
+ * Plugin output
+ */
+void qemu_plugin_outs(const char *string)
+{
+    qemu_log_mask(CPU_LOG_PLUGIN, "%s", string);
+}
diff --git a/plugins/core.c b/plugins/core.c
new file mode 100644
index 0000000000..9e1b9e7a91
--- /dev/null
+++ b/plugins/core.c
@@ -0,0 +1,502 @@
+/*
+ * QEMU Plugin Core code
+ *
+ * This is the core code that deals with injecting instrumentation into the code
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019, Linaro
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/config-file.h"
+#include "qapi/error.h"
+#include "qemu/option.h"
+#include "qemu/rcu_queue.h"
+#include "qemu/xxhash.h"
+#include "qemu/rcu.h"
+#include "hw/core/cpu.h"
+#include "exec/cpu-common.h"
+
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "sysemu/sysemu.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-op.h"
+#include "trace/mem-internal.h" /* mem_info macros */
+#include "plugin.h"
+
+struct qemu_plugin_cb {
+    struct qemu_plugin_ctx *ctx;
+    union qemu_plugin_cb_sig f;
+    void *udata;
+    QLIST_ENTRY(qemu_plugin_cb) entry;
+};
+
+struct qemu_plugin_state plugin;
+
+struct qemu_plugin_ctx *plugin_id_to_ctx_locked(qemu_plugin_id_t id)
+{
+    struct qemu_plugin_ctx *ctx;
+    qemu_plugin_id_t *id_p;
+
+    id_p = g_hash_table_lookup(plugin.id_ht, &id);
+    ctx = container_of(id_p, struct qemu_plugin_ctx, id);
+    if (ctx == NULL) {
+        error_report("plugin: invalid plugin id %" PRIu64, id);
+        abort();
+    }
+    return ctx;
+}
+
+static void plugin_cpu_update__async(CPUState *cpu, run_on_cpu_data data)
+{
+    bitmap_copy(cpu->plugin_mask, &data.host_ulong, QEMU_PLUGIN_EV_MAX);
+    cpu_tb_jmp_cache_clear(cpu);
+}
+
+static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
+{
+    CPUState *cpu = container_of(k, CPUState, cpu_index);
+    run_on_cpu_data mask = RUN_ON_CPU_HOST_ULONG(*plugin.mask);
+
+    if (cpu->created) {
+        async_run_on_cpu(cpu, plugin_cpu_update__async, mask);
+    } else {
+        plugin_cpu_update__async(cpu, mask);
+    }
+}
+
+void plugin_unregister_cb__locked(struct qemu_plugin_ctx *ctx,
+                                  enum qemu_plugin_event ev)
+{
+    struct qemu_plugin_cb *cb = ctx->callbacks[ev];
+
+    if (cb == NULL) {
+        return;
+    }
+    QLIST_REMOVE_RCU(cb, entry);
+    g_free(cb);
+    ctx->callbacks[ev] = NULL;
+    if (QLIST_EMPTY_RCU(&plugin.cb_lists[ev])) {
+        clear_bit(ev, plugin.mask);
+        g_hash_table_foreach(plugin.cpu_ht, plugin_cpu_update__locked, NULL);
+    }
+}
+
+static void plugin_vcpu_cb__simple(CPUState *cpu, enum qemu_plugin_event ev)
+{
+    struct qemu_plugin_cb *cb, *next;
+
+    switch (ev) {
+    case QEMU_PLUGIN_EV_VCPU_INIT:
+    case QEMU_PLUGIN_EV_VCPU_EXIT:
+    case QEMU_PLUGIN_EV_VCPU_IDLE:
+    case QEMU_PLUGIN_EV_VCPU_RESUME:
+        /* iterate safely; plugins might uninstall themselves at any time */
+        QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
+            qemu_plugin_vcpu_simple_cb_t func = cb->f.vcpu_simple;
+
+            func(cb->ctx->id, cpu->cpu_index);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void plugin_cb__simple(enum qemu_plugin_event ev)
+{
+    struct qemu_plugin_cb *cb, *next;
+
+    switch (ev) {
+    case QEMU_PLUGIN_EV_FLUSH:
+        QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
+            qemu_plugin_simple_cb_t func = cb->f.simple;
+
+            func(cb->ctx->id);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void plugin_cb__udata(enum qemu_plugin_event ev)
+{
+    struct qemu_plugin_cb *cb, *next;
+
+    switch (ev) {
+    case QEMU_PLUGIN_EV_ATEXIT:
+        QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
+            qemu_plugin_udata_cb_t func = cb->f.udata;
+
+            func(cb->ctx->id, cb->udata);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void
+do_plugin_register_cb(qemu_plugin_id_t id, enum qemu_plugin_event ev,
+                      void *func, void *udata)
+{
+    struct qemu_plugin_ctx *ctx;
+
+    qemu_rec_mutex_lock(&plugin.lock);
+    ctx = plugin_id_to_ctx_locked(id);
+    /* if the plugin is on its way out, ignore this request */
+    if (unlikely(ctx->uninstalling)) {
+        goto out_unlock;
+    }
+    if (func) {
+        struct qemu_plugin_cb *cb = ctx->callbacks[ev];
+
+        if (cb) {
+            cb->f.generic = func;
+            cb->udata = udata;
+        } else {
+            cb = g_new(struct qemu_plugin_cb, 1);
+            cb->ctx = ctx;
+            cb->f.generic = func;
+            cb->udata = udata;
+            ctx->callbacks[ev] = cb;
+            QLIST_INSERT_HEAD_RCU(&plugin.cb_lists[ev], cb, entry);
+            if (!test_bit(ev, plugin.mask)) {
+                set_bit(ev, plugin.mask);
+                g_hash_table_foreach(plugin.cpu_ht, plugin_cpu_update__locked,
+                                     NULL);
+            }
+        }
+    } else {
+        plugin_unregister_cb__locked(ctx, ev);
+    }
+ out_unlock:
+    qemu_rec_mutex_unlock(&plugin.lock);
+}
+
+void plugin_register_cb(qemu_plugin_id_t id, enum qemu_plugin_event ev,
+                        void *func)
+{
+    do_plugin_register_cb(id, ev, func, NULL);
+}
+
+void
+plugin_register_cb_udata(qemu_plugin_id_t id, enum qemu_plugin_event ev,
+                         void *func, void *udata)
+{
+    do_plugin_register_cb(id, ev, func, udata);
+}
+
+void qemu_plugin_vcpu_init_hook(CPUState *cpu)
+{
+    bool success;
+
+    qemu_rec_mutex_lock(&plugin.lock);
+    plugin_cpu_update__locked(&cpu->cpu_index, NULL, NULL);
+    success = g_hash_table_insert(plugin.cpu_ht, &cpu->cpu_index,
+                                  &cpu->cpu_index);
+    g_assert(success);
+    qemu_rec_mutex_unlock(&plugin.lock);
+
+    plugin_vcpu_cb__simple(cpu, QEMU_PLUGIN_EV_VCPU_INIT);
+}
+
+void qemu_plugin_vcpu_exit_hook(CPUState *cpu)
+{
+    bool success;
+
+    plugin_vcpu_cb__simple(cpu, QEMU_PLUGIN_EV_VCPU_EXIT);
+
+    qemu_rec_mutex_lock(&plugin.lock);
+    success = g_hash_table_remove(plugin.cpu_ht, &cpu->cpu_index);
+    g_assert(success);
+    qemu_rec_mutex_unlock(&plugin.lock);
+}
+
+struct plugin_for_each_args {
+    struct qemu_plugin_ctx *ctx;
+    qemu_plugin_vcpu_simple_cb_t cb;
+};
+
+static void plugin_vcpu_for_each(gpointer k, gpointer v, gpointer udata)
+{
+    struct plugin_for_each_args *args = udata;
+    int cpu_index = *(int *)k;
+
+    args->cb(args->ctx->id, cpu_index);
+}
+
+void qemu_plugin_vcpu_for_each(qemu_plugin_id_t id,
+                               qemu_plugin_vcpu_simple_cb_t cb)
+{
+    struct plugin_for_each_args args;
+
+    if (cb == NULL) {
+        return;
+    }
+    qemu_rec_mutex_lock(&plugin.lock);
+    args.ctx = plugin_id_to_ctx_locked(id);
+    args.cb = cb;
+    g_hash_table_foreach(plugin.cpu_ht, plugin_vcpu_for_each, &args);
+    qemu_rec_mutex_unlock(&plugin.lock);
+}
+
+/* Allocate and return a callback record */
+static struct qemu_plugin_dyn_cb *plugin_get_dyn_cb(GArray **arr)
+{
+    GArray *cbs = *arr;
+
+    if (!cbs) {
+        cbs = g_array_sized_new(false, false,
+                                sizeof(struct qemu_plugin_dyn_cb), 1);
+        *arr = cbs;
+    }
+
+    g_array_set_size(cbs, cbs->len + 1);
+    return &g_array_index(cbs, struct qemu_plugin_dyn_cb, cbs->len - 1);
+}
+
+void plugin_register_inline_op(GArray **arr,
+                               enum qemu_plugin_mem_rw rw,
+                               enum qemu_plugin_op op, void *ptr,
+                               uint64_t imm)
+{
+    struct qemu_plugin_dyn_cb *dyn_cb;
+
+    dyn_cb = plugin_get_dyn_cb(arr);
+    dyn_cb->userp = ptr;
+    dyn_cb->type = PLUGIN_CB_INLINE;
+    dyn_cb->rw = rw;
+    dyn_cb->inline_insn.op = op;
+    dyn_cb->inline_insn.imm = imm;
+}
+
+static inline uint32_t cb_to_tcg_flags(enum qemu_plugin_cb_flags flags)
+{
+    uint32_t ret;
+
+    switch (flags) {
+    case QEMU_PLUGIN_CB_RW_REGS:
+        ret = 0;
+    case QEMU_PLUGIN_CB_R_REGS:
+        ret = TCG_CALL_NO_WG;
+        break;
+    case QEMU_PLUGIN_CB_NO_REGS:
+    default:
+        ret = TCG_CALL_NO_RWG;
+    }
+    return ret;
+}
+
+inline void
+plugin_register_dyn_cb__udata(GArray **arr,
+                              qemu_plugin_vcpu_udata_cb_t cb,
+                              enum qemu_plugin_cb_flags flags, void *udata)
+{
+    struct qemu_plugin_dyn_cb *dyn_cb = plugin_get_dyn_cb(arr);
+
+    dyn_cb->userp = udata;
+    dyn_cb->tcg_flags = cb_to_tcg_flags(flags);
+    dyn_cb->f.vcpu_udata = cb;
+    dyn_cb->type = PLUGIN_CB_REGULAR;
+}
+
+void plugin_register_vcpu_mem_cb(GArray **arr,
+                                 void *cb,
+                                 enum qemu_plugin_cb_flags flags,
+                                 enum qemu_plugin_mem_rw rw,
+                                 void *udata)
+{
+    struct qemu_plugin_dyn_cb *dyn_cb;
+
+    dyn_cb = plugin_get_dyn_cb(arr);
+    dyn_cb->userp = udata;
+    dyn_cb->tcg_flags = cb_to_tcg_flags(flags);
+    dyn_cb->type = PLUGIN_CB_REGULAR;
+    dyn_cb->rw = rw;
+    dyn_cb->f.generic = cb;
+}
+
+void qemu_plugin_tb_trans_cb(CPUState *cpu, struct qemu_plugin_tb *tb)
+{
+    struct qemu_plugin_cb *cb, *next;
+    enum qemu_plugin_event ev = QEMU_PLUGIN_EV_VCPU_TB_TRANS;
+
+    /* no plugin_mask check here; caller should have checked */
+
+    QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
+        qemu_plugin_vcpu_tb_trans_cb_t func = cb->f.vcpu_tb_trans;
+
+        func(cb->ctx->id, tb);
+    }
+}
+
+void
+qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1, uint64_t a2,
+                         uint64_t a3, uint64_t a4, uint64_t a5,
+                         uint64_t a6, uint64_t a7, uint64_t a8)
+{
+    struct qemu_plugin_cb *cb, *next;
+    enum qemu_plugin_event ev = QEMU_PLUGIN_EV_VCPU_SYSCALL;
+
+    if (!test_bit(ev, cpu->plugin_mask)) {
+        return;
+    }
+
+    QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
+        qemu_plugin_vcpu_syscall_cb_t func = cb->f.vcpu_syscall;
+
+        func(cb->ctx->id, cpu->cpu_index, num, a1, a2, a3, a4, a5, a6, a7, a8);
+    }
+}
+
+void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
+{
+    struct qemu_plugin_cb *cb, *next;
+    enum qemu_plugin_event ev = QEMU_PLUGIN_EV_VCPU_SYSCALL_RET;
+
+    if (!test_bit(ev, cpu->plugin_mask)) {
+        return;
+    }
+
+    QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
+        qemu_plugin_vcpu_syscall_ret_cb_t func = cb->f.vcpu_syscall_ret;
+
+        func(cb->ctx->id, cpu->cpu_index, num, ret);
+    }
+}
+
+void qemu_plugin_vcpu_idle_cb(CPUState *cpu)
+{
+    plugin_vcpu_cb__simple(cpu, QEMU_PLUGIN_EV_VCPU_IDLE);
+}
+
+void qemu_plugin_vcpu_resume_cb(CPUState *cpu)
+{
+    plugin_vcpu_cb__simple(cpu, QEMU_PLUGIN_EV_VCPU_RESUME);
+}
+
+void qemu_plugin_register_vcpu_idle_cb(qemu_plugin_id_t id,
+                                       qemu_plugin_vcpu_simple_cb_t cb)
+{
+    plugin_register_cb(id, QEMU_PLUGIN_EV_VCPU_IDLE, cb);
+}
+
+void qemu_plugin_register_vcpu_resume_cb(qemu_plugin_id_t id,
+                                         qemu_plugin_vcpu_simple_cb_t cb)
+{
+    plugin_register_cb(id, QEMU_PLUGIN_EV_VCPU_RESUME, cb);
+}
+
+void qemu_plugin_register_flush_cb(qemu_plugin_id_t id,
+                                   qemu_plugin_simple_cb_t cb)
+{
+    plugin_register_cb(id, QEMU_PLUGIN_EV_FLUSH, cb);
+}
+
+static bool free_dyn_cb_arr(void *p, uint32_t h, void *userp)
+{
+    g_array_free((GArray *) p, true);
+    return true;
+}
+
+void qemu_plugin_flush_cb(void)
+{
+    qht_iter_remove(&plugin.dyn_cb_arr_ht, free_dyn_cb_arr, NULL);
+    qht_reset(&plugin.dyn_cb_arr_ht);
+
+    plugin_cb__simple(QEMU_PLUGIN_EV_FLUSH);
+}
+
+void exec_inline_op(struct qemu_plugin_dyn_cb *cb)
+{
+    uint64_t *val = cb->userp;
+
+    switch (cb->inline_insn.op) {
+    case QEMU_PLUGIN_INLINE_ADD_U64:
+        *val += cb->inline_insn.imm;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t info)
+{
+    GArray *arr = cpu->plugin_mem_cbs;
+    size_t i;
+
+    if (arr == NULL) {
+        return;
+    }
+    for (i = 0; i < arr->len; i++) {
+        struct qemu_plugin_dyn_cb *cb =
+            &g_array_index(arr, struct qemu_plugin_dyn_cb, i);
+        int w = !!(info & TRACE_MEM_ST) + 1;
+
+        if (!(w & cb->rw)) {
+                break;
+        }
+        switch (cb->type) {
+        case PLUGIN_CB_REGULAR:
+            cb->f.vcpu_mem(cpu->cpu_index, info, vaddr, cb->userp);
+            break;
+        case PLUGIN_CB_INLINE:
+            exec_inline_op(cb);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+}
+
+void qemu_plugin_atexit_cb(void)
+{
+    plugin_cb__udata(QEMU_PLUGIN_EV_ATEXIT);
+}
+
+void qemu_plugin_register_atexit_cb(qemu_plugin_id_t id,
+                                    qemu_plugin_udata_cb_t cb,
+                                    void *udata)
+{
+    plugin_register_cb_udata(id, QEMU_PLUGIN_EV_ATEXIT, cb, udata);
+}
+
+/*
+ * Call this function after longjmp'ing to the main loop. It's possible that the
+ * last instruction of a TB might have used helpers, and therefore the
+ * "disable" instruction will never execute because it ended up as dead code.
+ */
+void qemu_plugin_disable_mem_helpers(CPUState *cpu)
+{
+    cpu->plugin_mem_cbs = NULL;
+}
+
+static bool plugin_dyn_cb_arr_cmp(const void *ap, const void *bp)
+{
+    return ap == bp;
+}
+
+static void __attribute__((__constructor__)) plugin_init(void)
+{
+    int i;
+
+    for (i = 0; i < QEMU_PLUGIN_EV_MAX; i++) {
+        QLIST_INIT(&plugin.cb_lists[i]);
+    }
+    qemu_rec_mutex_init(&plugin.lock);
+    plugin.id_ht = g_hash_table_new(g_int64_hash, g_int64_equal);
+    plugin.cpu_ht = g_hash_table_new(g_int_hash, g_int_equal);
+    QTAILQ_INIT(&plugin.ctxs);
+    qht_init(&plugin.dyn_cb_arr_ht, plugin_dyn_cb_arr_cmp, 16,
+             QHT_MODE_AUTO_RESIZE);
+    atexit(qemu_plugin_atexit_cb);
+}
diff --git a/plugins/loader.c b/plugins/loader.c
new file mode 100644
index 0000000000..ce724ed583
--- /dev/null
+++ b/plugins/loader.c
@@ -0,0 +1,377 @@
+/*
+ * QEMU Plugin Core Loader Code
+ *
+ * This is the code responsible for loading and unloading the plugins.
+ * Aside from the basic housekeeping tasks we also need to ensure any
+ * generated code is flushed when we remove a plugin so we cannot end
+ * up calling and unloaded helper function.
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * Copyright (C) 2019, Linaro
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/config-file.h"
+#include "qapi/error.h"
+#include "qemu/option.h"
+#include "qemu/rcu_queue.h"
+#include "qemu/qht.h"
+#include "qemu/bitmap.h"
+#include "qemu/xxhash.h"
+#include "qemu/plugin.h"
+#include "hw/core/cpu.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#ifndef CONFIG_USER_ONLY
+#include "hw/boards.h"
+#endif
+
+#include "plugin.h"
+
+/*
+ * For convenience we use a bitmap for plugin.mask, but really all we need is a
+ * u32, which is what we store in TranslationBlock.
+ */
+QEMU_BUILD_BUG_ON(QEMU_PLUGIN_EV_MAX > 32);
+
+struct qemu_plugin_desc {
+    char *path;
+    char **argv;
+    QTAILQ_ENTRY(qemu_plugin_desc) entry;
+    int argc;
+};
+
+struct qemu_plugin_parse_arg {
+    QemuPluginList *head;
+    struct qemu_plugin_desc *curr;
+};
+
+QemuOptsList qemu_plugin_opts = {
+    .name = "plugin",
+    .implied_opt_name = "file",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_plugin_opts.head),
+    .desc = {
+        /* do our own parsing to support multiple plugins */
+        { /* end of list */ }
+    },
+};
+
+typedef int (*qemu_plugin_install_func_t)(qemu_plugin_id_t, const qemu_info_t *, int, char **);
+
+extern struct qemu_plugin_state plugin;
+
+void qemu_plugin_add_dyn_cb_arr(GArray *arr)
+{
+    uint32_t hash = qemu_xxhash2((uint64_t)(uintptr_t)arr);
+    bool inserted;
+
+    inserted = qht_insert(&plugin.dyn_cb_arr_ht, arr, hash, NULL);
+    g_assert(inserted);
+}
+
+static struct qemu_plugin_desc *plugin_find_desc(QemuPluginList *head,
+                                                 const char *path)
+{
+    struct qemu_plugin_desc *desc;
+
+    QTAILQ_FOREACH(desc, head, entry) {
+        if (strcmp(desc->path, path) == 0) {
+            return desc;
+        }
+    }
+    return NULL;
+}
+
+static int plugin_add(void *opaque, const char *name, const char *value,
+                      Error **errp)
+{
+    struct qemu_plugin_parse_arg *arg = opaque;
+    struct qemu_plugin_desc *p;
+
+    if (strcmp(name, "file") == 0) {
+        if (strcmp(value, "") == 0) {
+            error_setg(errp, "requires a non-empty argument");
+            return 1;
+        }
+        p = plugin_find_desc(arg->head, value);
+        if (p == NULL) {
+            p = g_new0(struct qemu_plugin_desc, 1);
+            p->path = g_strdup(value);
+            QTAILQ_INSERT_TAIL(arg->head, p, entry);
+        }
+        arg->curr = p;
+    } else if (strcmp(name, "arg") == 0) {
+        if (arg->curr == NULL) {
+            error_setg(errp, "missing earlier '-plugin file=' option");
+            return 1;
+        }
+        p = arg->curr;
+        p->argc++;
+        p->argv = g_realloc_n(p->argv, p->argc, sizeof(char *));
+        p->argv[p->argc - 1] = g_strdup(value);
+    } else {
+        error_setg(errp, "-plugin: unexpected parameter '%s'; ignored", name);
+    }
+    return 0;
+}
+
+void qemu_plugin_opt_parse(const char *optarg, QemuPluginList *head)
+{
+    struct qemu_plugin_parse_arg arg;
+    QemuOpts *opts;
+
+    opts = qemu_opts_parse_noisily(qemu_find_opts("plugin"), optarg, true);
+    if (opts == NULL) {
+        exit(1);
+    }
+    arg.head = head;
+    arg.curr = NULL;
+    qemu_opt_foreach(opts, plugin_add, &arg, &error_fatal);
+    qemu_opts_del(opts);
+}
+
+/*
+ * From: https://en.wikipedia.org/wiki/Xorshift
+ * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
+ * guaranteed to be >= INT_MAX).
+ */
+static uint64_t xorshift64star(uint64_t x)
+{
+    x ^= x >> 12; /* a */
+    x ^= x << 25; /* b */
+    x ^= x >> 27; /* c */
+    return x * UINT64_C(2685821657736338717);
+}
+
+static int plugin_load(struct qemu_plugin_desc *desc, const qemu_info_t *info)
+{
+    qemu_plugin_install_func_t install;
+    struct qemu_plugin_ctx *ctx;
+    gpointer sym;
+    int rc;
+
+    ctx = qemu_memalign(qemu_dcache_linesize, sizeof(*ctx));
+    memset(ctx, 0, sizeof(*ctx));
+    ctx->desc = desc;
+
+    ctx->handle = g_module_open(desc->path, G_MODULE_BIND_LOCAL);
+    if (ctx->handle == NULL) {
+        error_report("%s: %s", __func__, g_module_error());
+        goto err_dlopen;
+    }
+
+    if (!g_module_symbol(ctx->handle, "qemu_plugin_install", &sym)) {
+        error_report("%s: %s", __func__, g_module_error());
+        goto err_symbol;
+    }
+    install = (qemu_plugin_install_func_t) sym;
+    /* symbol was found; it could be NULL though */
+    if (install == NULL) {
+        error_report("%s: %s: qemu_plugin_install is NULL",
+                     __func__, desc->path);
+        goto err_symbol;
+    }
+
+    qemu_rec_mutex_lock(&plugin.lock);
+
+    /* find an unused random id with &ctx as the seed */
+    ctx->id = (uint64_t)(uintptr_t)ctx;
+    for (;;) {
+        void *existing;
+
+        ctx->id = xorshift64star(ctx->id);
+        existing = g_hash_table_lookup(plugin.id_ht, &ctx->id);
+        if (likely(existing == NULL)) {
+            bool success;
+
+            success = g_hash_table_insert(plugin.id_ht, &ctx->id, &ctx->id);
+            g_assert(success);
+            break;
+        }
+    }
+    QTAILQ_INSERT_TAIL(&plugin.ctxs, ctx, entry);
+    ctx->installing = true;
+    rc = install(ctx->id, info, desc->argc, desc->argv);
+    ctx->installing = false;
+    if (rc) {
+        error_report("%s: qemu_plugin_install returned error code %d",
+                     __func__, rc);
+        /*
+         * we cannot rely on the plugin doing its own cleanup, so
+         * call a full uninstall if the plugin did not yet call it.
+         */
+        if (!ctx->uninstalling) {
+            plugin_reset_uninstall(ctx->id, NULL, false);
+        }
+    }
+
+    qemu_rec_mutex_unlock(&plugin.lock);
+    return rc;
+
+ err_symbol:
+ err_dlopen:
+    qemu_vfree(ctx);
+    return 1;
+}
+
+/* call after having removed @desc from the list */
+static void plugin_desc_free(struct qemu_plugin_desc *desc)
+{
+    int i;
+
+    for (i = 0; i < desc->argc; i++) {
+        g_free(desc->argv[i]);
+    }
+    g_free(desc->argv);
+    g_free(desc->path);
+    g_free(desc);
+}
+
+/**
+ * qemu_plugin_load_list - load a list of plugins
+ * @head: head of the list of descriptors of the plugins to be loaded
+ *
+ * Returns 0 if all plugins in the list are installed, !0 otherwise.
+ *
+ * Note: the descriptor of each successfully installed plugin is removed
+ * from the list given by @head.
+ */
+int qemu_plugin_load_list(QemuPluginList *head)
+{
+    struct qemu_plugin_desc *desc, *next;
+    g_autofree qemu_info_t *info = g_new0(qemu_info_t, 1);
+
+    info->target_name = TARGET_NAME;
+#ifndef CONFIG_USER_ONLY
+    MachineState *ms = MACHINE(qdev_get_machine());
+    info->system_emulation = true;
+    info->system.smp_vcpus = ms->smp.cpus;
+    info->system.max_vcpus = ms->smp.max_cpus;
+#else
+    info->system_emulation = false;
+#endif
+
+    QTAILQ_FOREACH_SAFE(desc, head, entry, next) {
+        int err;
+
+        err = plugin_load(desc, info);
+        if (err) {
+            return err;
+        }
+        QTAILQ_REMOVE(head, desc, entry);
+    }
+    return 0;
+}
+
+struct qemu_plugin_reset_data {
+    struct qemu_plugin_ctx *ctx;
+    qemu_plugin_simple_cb_t cb;
+    bool reset;
+};
+
+static void plugin_reset_destroy__locked(struct qemu_plugin_reset_data *data)
+{
+    struct qemu_plugin_ctx *ctx = data->ctx;
+    enum qemu_plugin_event ev;
+    bool success;
+
+    /*
+     * After updating the subscription lists there is no need to wait for an RCU
+     * grace period to elapse, because right now we either are in a "safe async"
+     * work environment (i.e. all vCPUs are asleep), or no vCPUs have yet been
+     * created.
+     */
+    for (ev = 0; ev < QEMU_PLUGIN_EV_MAX; ev++) {
+        plugin_unregister_cb__locked(ctx, ev);
+    }
+
+    if (data->reset) {
+        g_assert(ctx->resetting);
+        if (data->cb) {
+            data->cb(ctx->id);
+        }
+        ctx->resetting = false;
+        g_free(data);
+        return;
+    }
+
+    g_assert(ctx->uninstalling);
+    /* we cannot dlclose if we are going to return to plugin code */
+    if (ctx->installing) {
+        error_report("Calling qemu_plugin_uninstall from the install function "
+                     "is a bug. Instead, return !0 from the install function.");
+        abort();
+    }
+
+    success = g_hash_table_remove(plugin.id_ht, &ctx->id);
+    g_assert(success);
+    QTAILQ_REMOVE(&plugin.ctxs, ctx, entry);
+    if (data->cb) {
+        data->cb(ctx->id);
+    }
+    if (!g_module_close(ctx->handle)) {
+        warn_report("%s: %s", __func__, g_module_error());
+    }
+    plugin_desc_free(ctx->desc);
+    qemu_vfree(ctx);
+    g_free(data);
+}
+
+static void plugin_reset_destroy(struct qemu_plugin_reset_data *data)
+{
+    qemu_rec_mutex_lock(&plugin.lock);
+    plugin_reset_destroy__locked(data);
+    qemu_rec_mutex_lock(&plugin.lock);
+}
+
+static void plugin_flush_destroy(CPUState *cpu, run_on_cpu_data arg)
+{
+    struct qemu_plugin_reset_data *data = arg.host_ptr;
+
+    g_assert(cpu_in_exclusive_context(cpu));
+    tb_flush(cpu);
+    plugin_reset_destroy(data);
+}
+
+void plugin_reset_uninstall(qemu_plugin_id_t id,
+                            qemu_plugin_simple_cb_t cb,
+                            bool reset)
+{
+    struct qemu_plugin_reset_data *data;
+    struct qemu_plugin_ctx *ctx;
+
+    qemu_rec_mutex_lock(&plugin.lock);
+    ctx = plugin_id_to_ctx_locked(id);
+    if (ctx->uninstalling || (reset && ctx->resetting)) {
+        qemu_rec_mutex_unlock(&plugin.lock);
+        return;
+    }
+    ctx->resetting = reset;
+    ctx->uninstalling = !reset;
+    qemu_rec_mutex_unlock(&plugin.lock);
+
+    data = g_new(struct qemu_plugin_reset_data, 1);
+    data->ctx = ctx;
+    data->cb = cb;
+    data->reset = reset;
+    /*
+     * Only flush the code cache if the vCPUs have been created. If so,
+     * current_cpu must be non-NULL.
+     */
+    if (current_cpu) {
+        async_safe_run_on_cpu(current_cpu, plugin_flush_destroy,
+                              RUN_ON_CPU_HOST_PTR(data));
+    } else {
+        /*
+         * If current_cpu isn't set, then we don't have yet any vCPU threads
+         * and we therefore can remove the callbacks synchronously.
+         */
+        plugin_reset_destroy(data);
+    }
+}
diff --git a/plugins/plugin.h b/plugins/plugin.h
new file mode 100644
index 0000000000..5482168d79
--- /dev/null
+++ b/plugins/plugin.h
@@ -0,0 +1,97 @@
+/*
+ * Plugin Shared Internal Functions
+ *
+ * Copyright (C) 2019, Linaro
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _PLUGIN_INTERNAL_H_
+#define _PLUGIN_INTERNAL_H_
+
+#include <gmodule.h>
+
+/* global state */
+struct qemu_plugin_state {
+    QTAILQ_HEAD(, qemu_plugin_ctx) ctxs;
+    QLIST_HEAD(, qemu_plugin_cb) cb_lists[QEMU_PLUGIN_EV_MAX];
+    /*
+     * Use the HT as a hash map by inserting k == v, which saves memory as
+     * documented by GLib. The parent struct is obtained with container_of().
+     */
+    GHashTable *id_ht;
+    /*
+     * Use the HT as a hash map. Note that we could use a list here,
+     * but with the HT we avoid adding a field to CPUState.
+     */
+    GHashTable *cpu_ht;
+    DECLARE_BITMAP(mask, QEMU_PLUGIN_EV_MAX);
+    /*
+     * @lock protects the struct as well as ctx->uninstalling.
+     * The lock must be acquired by all API ops.
+     * The lock is recursive, which greatly simplifies things, e.g.
+     * callback registration from qemu_plugin_vcpu_for_each().
+     */
+    QemuRecMutex lock;
+    /*
+     * HT of callbacks invoked from helpers. All entries are freed when
+     * the code cache is flushed.
+     */
+    struct qht dyn_cb_arr_ht;
+};
+
+
+struct qemu_plugin_ctx {
+    GModule *handle;
+    qemu_plugin_id_t id;
+    struct qemu_plugin_cb *callbacks[QEMU_PLUGIN_EV_MAX];
+    QTAILQ_ENTRY(qemu_plugin_ctx) entry;
+    /*
+     * keep a reference to @desc until uninstall, so that plugins do not have
+     * to strdup plugin args.
+     */
+    struct qemu_plugin_desc *desc;
+    bool installing;
+    bool uninstalling;
+    bool resetting;
+};
+
+struct qemu_plugin_ctx *plugin_id_to_ctx_locked(qemu_plugin_id_t id);
+
+void plugin_register_inline_op(GArray **arr,
+                               enum qemu_plugin_mem_rw rw,
+                               enum qemu_plugin_op op, void *ptr,
+                               uint64_t imm);
+
+void plugin_reset_uninstall(qemu_plugin_id_t id,
+                            qemu_plugin_simple_cb_t cb,
+                            bool reset);
+
+void plugin_register_cb(qemu_plugin_id_t id, enum qemu_plugin_event ev,
+                        void *func);
+
+void plugin_unregister_cb__locked(struct qemu_plugin_ctx *ctx,
+                                  enum qemu_plugin_event ev);
+
+void
+plugin_register_cb_udata(qemu_plugin_id_t id, enum qemu_plugin_event ev,
+                         void *func, void *udata);
+
+void
+plugin_register_dyn_cb__udata(GArray **arr,
+                              qemu_plugin_vcpu_udata_cb_t cb,
+                              enum qemu_plugin_cb_flags flags, void *udata);
+
+
+void plugin_register_vcpu_mem_cb(GArray **arr,
+                                 void *cb,
+                                 enum qemu_plugin_cb_flags flags,
+                                 enum qemu_plugin_mem_rw rw,
+                                 void *udata);
+
+void exec_inline_op(struct qemu_plugin_dyn_cb *cb);
+
+#endif /* _PLUGIN_INTERNAL_H_ */
diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
new file mode 100644
index 0000000000..4bdb381f48
--- /dev/null
+++ b/plugins/qemu-plugins.symbols
@@ -0,0 +1,40 @@
+{
+  qemu_plugin_uninstall;
+  qemu_plugin_reset;
+  qemu_plugin_register_vcpu_init_cb;
+  qemu_plugin_register_vcpu_exit_cb;
+  qemu_plugin_register_vcpu_idle_cb;
+  qemu_plugin_register_vcpu_resume_cb;
+  qemu_plugin_register_vcpu_insn_exec_cb;
+  qemu_plugin_register_vcpu_insn_exec_inline;
+  qemu_plugin_register_vcpu_mem_cb;
+  qemu_plugin_register_vcpu_mem_haddr_cb;
+  qemu_plugin_register_vcpu_mem_inline;
+  qemu_plugin_ram_addr_from_host;
+  qemu_plugin_register_vcpu_tb_trans_cb;
+  qemu_plugin_register_vcpu_tb_exec_cb;
+  qemu_plugin_register_vcpu_tb_exec_inline;
+  qemu_plugin_register_flush_cb;
+  qemu_plugin_register_vcpu_syscall_cb;
+  qemu_plugin_register_vcpu_syscall_ret_cb;
+  qemu_plugin_register_atexit_cb;
+  qemu_plugin_tb_n_insns;
+  qemu_plugin_tb_get_insn;
+  qemu_plugin_tb_vaddr;
+  qemu_plugin_insn_data;
+  qemu_plugin_insn_size;
+  qemu_plugin_insn_vaddr;
+  qemu_plugin_insn_haddr;
+  qemu_plugin_insn_disas;
+  qemu_plugin_mem_size_shift;
+  qemu_plugin_mem_is_sign_extended;
+  qemu_plugin_mem_is_big_endian;
+  qemu_plugin_mem_is_store;
+  qemu_plugin_get_hwaddr;
+  qemu_plugin_hwaddr_is_io;
+  qemu_plugin_hwaddr_to_raddr;
+  qemu_plugin_vcpu_for_each;
+  qemu_plugin_n_vcpus;
+  qemu_plugin_n_max_vcpus;
+  qemu_plugin_outs;
+};
diff --git a/qemu-options.hx b/qemu-options.hx
index 4feb490338..1fc2470e2f 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4201,6 +4201,23 @@ HXCOMM HX does not support conditional compilation of text.
 @findex -trace
 @include qemu-option-trace.texi
 ETEXI
+DEF("plugin", HAS_ARG, QEMU_OPTION_plugin,
+    "-plugin [file=]<file>[,arg=<string>]\n"
+    "                load a plugin\n",
+    QEMU_ARCH_ALL)
+STEXI
+@item -plugin file=@var{file}[,arg=@var{string}]
+@findex -plugin
+
+Load a plugin.
+
+@table @option
+@item file=@var{file}
+Load the given plugin from a shared library file.
+@item arg=@var{string}
+Argument string passed to the plugin. (Can be given multiple times.)
+@end table
+ETEXI
 
 HXCOMM Internal use
 DEF("qtest", HAS_ARG, QEMU_OPTION_qtest, "", QEMU_ARCH_ALL)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index ab68a16fd2..3aef6e3dfe 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1639,7 +1639,7 @@ sub process {
 # Block comment styles
 
 		# Block comments use /* on a line of its own
-		if ($rawline !~ m@^\+.*/\*.*\*/[ \t]*$@ &&	#inline /*...*/
+		if ($rawline !~ m@^\+.*/\*.*\*/[ \t)}]*$@ &&	#inline /*...*/
 		    $rawline =~ m@^\+.*/\*\*?+[ \t]*[^ \t]@) { # /* or /** non-blank
 			WARN("Block comments use a leading /* on a separate line\n" . $herecurr);
 		}
diff --git a/scripts/tracetool/transform.py b/scripts/tracetool/transform.py
index e18b05315e..2ca9286046 100644
--- a/scripts/tracetool/transform.py
+++ b/scripts/tracetool/transform.py
@@ -83,6 +83,7 @@ TCG_2_HOST = {
 
 HOST_2_TCG_COMPAT = {
     "uint8_t": "uint32_t",
+    "uint16_t": "uint32_t",
     }
 
 
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index a69f58bf65..f7f1ed0f41 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -2987,7 +2987,7 @@ static void alpha_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPUAlphaState *env = cpu->env_ptr;
-    uint32_t insn = cpu_ldl_code(env, ctx->base.pc_next);
+    uint32_t insn = translator_ldl(env, ctx->base.pc_next);
 
     ctx->base.pc_next += 4;
     ctx->base.is_jmp = translate_one(ctx, insn);
diff --git a/target/arm/arm_ldst.h b/target/arm/arm_ldst.h
index 5e0ac8bef0..45edb108f6 100644
--- a/target/arm/arm_ldst.h
+++ b/target/arm/arm_ldst.h
@@ -20,25 +20,20 @@
 #ifndef ARM_LDST_H
 #define ARM_LDST_H
 
-#include "exec/cpu_ldst.h"
+#include "exec/translator.h"
 #include "qemu/bswap.h"
 
 /* Load an instruction and return it in the standard little-endian order */
 static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
                                     bool sctlr_b)
 {
-    uint32_t insn = cpu_ldl_code(env, addr);
-    if (bswap_code(sctlr_b)) {
-        return bswap32(insn);
-    }
-    return insn;
+    return translator_ldl_swap(env, addr, bswap_code(sctlr_b));
 }
 
 /* Ditto, for a halfword (Thumb) instruction */
 static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
                                      bool sctlr_b)
 {
-    uint16_t insn;
 #ifndef CONFIG_USER_ONLY
     /* In big-endian (BE32) mode, adjacent Thumb instructions have been swapped
        within each word.  Undo that now.  */
@@ -46,11 +41,7 @@ static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
         addr ^= 2;
     }
 #endif
-    insn = cpu_lduw_code(env, addr);
-    if (bswap_code(sctlr_b)) {
-        return bswap16(insn);
-    }
-    return insn;
+    return translator_lduw_swap(env, addr, bswap_code(sctlr_b));
 }
 
 #endif
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index c1b2822f60..2f8d407a82 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -4221,7 +4221,7 @@ static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     {
         /* Always fetch the insn, even if nullified, so that we check
            the page permissions for execute.  */
-        uint32_t insn = cpu_ldl_code(env, ctx->base.pc_next);
+        uint32_t insn = translator_ldl(env, ctx->base.pc_next);
 
         /* Set up the IA queue for the next insn.
            This will be overwritten by a branch.  */
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 868b0acafe..77e932d827 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -1925,28 +1925,28 @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
 
 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
 {
-    return cpu_ldub_code(env, advance_pc(env, s, 1));
+    return translator_ldub(env, advance_pc(env, s, 1));
 }
 
 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
 {
-    return cpu_ldsw_code(env, advance_pc(env, s, 2));
+    return translator_ldsw(env, advance_pc(env, s, 2));
 }
 
 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
 {
-    return cpu_lduw_code(env, advance_pc(env, s, 2));
+    return translator_lduw(env, advance_pc(env, s, 2));
 }
 
 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
 {
-    return cpu_ldl_code(env, advance_pc(env, s, 4));
+    return translator_ldl(env, advance_pc(env, s, 4));
 }
 
 #ifdef TARGET_X86_64
 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
 {
-    return cpu_ldq_code(env, advance_pc(env, s, 8));
+    return translator_ldq(env, advance_pc(env, s, 8));
 }
 #endif
 
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 24c1dd3408..fcdb7bc8e4 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -384,7 +384,7 @@ static TCGv gen_ldst(DisasContext *s, int opsize, TCGv addr, TCGv val,
 static inline uint16_t read_im16(CPUM68KState *env, DisasContext *s)
 {
     uint16_t im;
-    im = cpu_lduw_code(env, s->pc);
+    im = translator_lduw(env, s->pc);
     s->pc += 2;
     return im;
 }
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
index 6addbac8d6..8dd28d6cf1 100644
--- a/target/openrisc/translate.c
+++ b/target/openrisc/translate.c
@@ -1645,7 +1645,7 @@ static void openrisc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
     OpenRISCCPU *cpu = OPENRISC_CPU(cs);
-    uint32_t insn = cpu_ldl_code(&cpu->env, dc->base.pc_next);
+    uint32_t insn = translator_ldl(&cpu->env, dc->base.pc_next);
 
     if (!decode(dc, insn)) {
         gen_illegal_exception(dc);
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index adb8fd516f..f5fe5d0611 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -7853,11 +7853,9 @@ static void ppc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     LOG_DISAS("nip=" TARGET_FMT_lx " super=%d ir=%d\n",
               ctx->base.pc_next, ctx->mem_idx, (int)msr_ir);
 
-    if (unlikely(need_byteswap(ctx))) {
-        ctx->opcode = bswap32(cpu_ldl_code(env, ctx->base.pc_next));
-    } else {
-        ctx->opcode = cpu_ldl_code(env, ctx->base.pc_next);
-    }
+    ctx->opcode = translator_ldl_swap(env, ctx->base.pc_next,
+                                      need_byteswap(ctx));
+
     LOG_DISAS("translate opcode %08x (%02x %02x %02x %02x) (%s)\n",
               ctx->opcode, opc1(ctx->opcode), opc2(ctx->opcode),
               opc3(ctx->opcode), opc4(ctx->opcode),
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index adeddb85f6..b26533d4fd 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -779,7 +779,7 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPURISCVState *env = cpu->env_ptr;
 
-    ctx->opcode = cpu_ldl_code(env, ctx->base.pc_next);
+    ctx->opcode = translator_ldl(env, ctx->base.pc_next);
     decode_opc(ctx);
     ctx->base.pc_next = ctx->pc_succ_insn;
 
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 5a7d8c4535..922785e225 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1917,7 +1917,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
 
     /* Read all of the insns for the region.  */
     for (i = 0; i < max_insns; ++i) {
-        insns[i] = cpu_lduw_code(env, pc + i * 2);
+        insns[i] = translator_lduw(env, pc + i * 2);
     }
 
     ld_adr = ld_dst = ld_mop = -1;
@@ -2332,7 +2332,7 @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     }
 #endif
 
-    ctx->opcode = cpu_lduw_code(env, ctx->base.pc_next);
+    ctx->opcode = translator_lduw(env, ctx->base.pc_next);
     decode_opc(ctx);
     ctx->base.pc_next += 2;
 }
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index c68bf4a2e4..edc23a7c40 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -5884,7 +5884,7 @@ static void sparc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     CPUSPARCState *env = cs->env_ptr;
     unsigned int insn;
 
-    insn = cpu_ldl_code(env, dc->pc);
+    insn = translator_ldl(env, dc->pc);
     dc->base.pc_next += 4;
     disas_sparc_insn(dc, insn);
 
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index d20e60ce77..a99f5296e2 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -859,7 +859,7 @@ static int arg_copy_compare(const void *a, const void *b)
 static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
 {
     xtensa_isa isa = dc->config->isa;
-    unsigned char b[MAX_INSN_LENGTH] = {cpu_ldub_code(env, dc->pc)};
+    unsigned char b[MAX_INSN_LENGTH] = {translator_ldub(env, dc->pc)};
     unsigned len = xtensa_op0_insn_len(dc, b[0]);
     xtensa_format fmt;
     int slot, slots;
@@ -883,7 +883,7 @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
 
     dc->base.pc_next = dc->pc + len;
     for (i = 1; i < len; ++i) {
-        b[i] = cpu_ldub_code(env, dc->pc + i);
+        b[i] = translator_ldub(env, dc->pc + i);
     }
     xtensa_insnbuf_from_chars(isa, dc->insnbuf, b, len);
     fmt = xtensa_format_decode(isa, dc->insnbuf);
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index e87c327fbf..c245126f98 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -30,6 +30,7 @@
 #include "tcg-mo.h"
 #include "trace-tcg.h"
 #include "trace/mem.h"
+#include "exec/plugin-gen.h"
 
 /* Reduce the number of ifdefs below.  This assumes that all uses of
    TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
@@ -2684,6 +2685,7 @@ void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx)
         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
     }
 
+    plugin_gen_disable_mem_helpers();
     tcg_gen_op1i(INDEX_op_exit_tb, val);
 }
 
@@ -2696,6 +2698,7 @@ void tcg_gen_goto_tb(unsigned idx)
     tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
 #endif
+    plugin_gen_disable_mem_helpers();
     /* When not chaining, we simply fall through to the "fallback" exit.  */
     if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
         tcg_gen_op1i(INDEX_op_goto_tb, idx);
@@ -2705,7 +2708,10 @@ void tcg_gen_goto_tb(unsigned idx)
 void tcg_gen_lookup_and_goto_ptr(void)
 {
     if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
-        TCGv_ptr ptr = tcg_temp_new_ptr();
+        TCGv_ptr ptr;
+
+        plugin_gen_disable_mem_helpers();
+        ptr = tcg_temp_new_ptr();
         gen_helper_lookup_tb_ptr(ptr, cpu_env);
         tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
         tcg_temp_free_ptr(ptr);
@@ -2788,14 +2794,24 @@ static void tcg_gen_req_mo(TCGBar type)
     }
 }
 
+static inline void plugin_gen_mem_callbacks(TCGv vaddr, uint16_t info)
+{
+#ifdef CONFIG_PLUGIN
+    if (tcg_ctx->plugin_insn == NULL) {
+        return;
+    }
+    plugin_gen_empty_mem_callback(vaddr, info);
+#endif
+}
+
 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
 {
     MemOp orig_memop;
+    uint16_t info = trace_mem_get_info(memop, idx, 0);
 
     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     memop = tcg_canonicalize_memop(memop, 0, 0);
-    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
-                               addr, trace_mem_get_info(memop, 0));
+    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
 
     orig_memop = memop;
     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
@@ -2807,6 +2823,7 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
     }
 
     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
+    plugin_gen_mem_callbacks(addr, info);
 
     if ((orig_memop ^ memop) & MO_BSWAP) {
         switch (orig_memop & MO_SIZE) {
@@ -2828,11 +2845,11 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
 {
     TCGv_i32 swap = NULL;
+    uint16_t info = trace_mem_get_info(memop, idx, 1);
 
     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     memop = tcg_canonicalize_memop(memop, 0, 1);
-    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
-                               addr, trace_mem_get_info(memop, 1));
+    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
 
     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
         swap = tcg_temp_new_i32();
@@ -2852,6 +2869,7 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
     }
 
     gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
+    plugin_gen_mem_callbacks(addr, info);
 
     if (swap) {
         tcg_temp_free_i32(swap);
@@ -2861,6 +2879,7 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
 {
     MemOp orig_memop;
+    uint16_t info;
 
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
@@ -2874,8 +2893,8 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
 
     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     memop = tcg_canonicalize_memop(memop, 1, 0);
-    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
-                               addr, trace_mem_get_info(memop, 0));
+    info = trace_mem_get_info(memop, idx, 0);
+    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
 
     orig_memop = memop;
     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
@@ -2887,6 +2906,7 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
     }
 
     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
+    plugin_gen_mem_callbacks(addr, info);
 
     if ((orig_memop ^ memop) & MO_BSWAP) {
         switch (orig_memop & MO_SIZE) {
@@ -2914,6 +2934,7 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
 {
     TCGv_i64 swap = NULL;
+    uint16_t info;
 
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
@@ -2922,8 +2943,8 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
 
     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     memop = tcg_canonicalize_memop(memop, 1, 1);
-    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
-                               addr, trace_mem_get_info(memop, 1));
+    info = trace_mem_get_info(memop, idx, 1);
+    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
 
     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
         swap = tcg_temp_new_i64();
@@ -2947,6 +2968,7 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
     }
 
     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
+    plugin_gen_mem_callbacks(addr, info);
 
     if (swap) {
         tcg_temp_free_i64(swap);
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index e9cf172762..4af272daa5 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -833,6 +833,17 @@ void tcg_gen_goto_tb(unsigned idx);
  */
 void tcg_gen_lookup_and_goto_ptr(void);
 
+static inline void tcg_gen_plugin_cb_start(unsigned from, unsigned type,
+                                           unsigned wr)
+{
+    tcg_gen_op3(INDEX_op_plugin_cb_start, from, type, wr);
+}
+
+static inline void tcg_gen_plugin_cb_end(void)
+{
+    tcg_emit_op(INDEX_op_plugin_cb_end);
+}
+
 #if TARGET_LONG_BITS == 32
 #define tcg_temp_new() tcg_temp_new_i32()
 #define tcg_global_reg_new tcg_global_reg_new_i32
@@ -1249,6 +1260,11 @@ static inline void tcg_gen_ld_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t o)
     glue(tcg_gen_ld_,PTR)((NAT)r, a, o);
 }
 
+static inline void tcg_gen_st_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t o)
+{
+    glue(tcg_gen_st_, PTR)((NAT)r, a, o);
+}
+
 static inline void tcg_gen_discard_ptr(TCGv_ptr a)
 {
     glue(tcg_gen_discard_,PTR)((NAT)a);
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 242d608e6d..9288a04946 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -198,6 +198,9 @@ DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
 DEF(goto_ptr, 0, 1, 0,
     TCG_OPF_BB_EXIT | TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr))
 
+DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT)
+DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT)
+
 DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1,
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 16b2d0e0ec..5475d49ed1 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -736,6 +736,15 @@ void tcg_region_init(void)
 #endif
 }
 
+static void alloc_tcg_plugin_context(TCGContext *s)
+{
+#ifdef CONFIG_PLUGIN
+    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
+    s->plugin_tb->insns =
+        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
+#endif
+}
+
 /*
  * All TCG threads except the parent (i.e. the one that called tcg_context_init
  * and registered the target's TCG globals) must register with this function
@@ -780,6 +789,10 @@ void tcg_register_thread(void)
     g_assert(n < ms->smp.max_cpus);
     atomic_set(&tcg_ctxs[n], s);
 
+    if (n > 0) {
+        alloc_tcg_plugin_context(s);
+    }
+
     tcg_ctx = s;
     qemu_mutex_lock(&region.lock);
     err = tcg_region_initial_alloc__locked(tcg_ctx);
@@ -976,6 +989,8 @@ void tcg_context_init(TCGContext *s)
         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
     }
 
+    alloc_tcg_plugin_context(s);
+
     tcg_ctx = s;
     /*
      * In user-mode we simply share the init context among threads, since we
@@ -1681,6 +1696,13 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
     flags = info->flags;
     sizemask = info->sizemask;
 
+#ifdef CONFIG_PLUGIN
+    /* detect non-plugin helpers */
+    if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
+        tcg_ctx->plugin_insn->calls_helpers = true;
+    }
+#endif
+
 #if defined(__sparc__) && !defined(__arch64__) \
     && !defined(CONFIG_TCG_INTERPRETER)
     /* We have 64-bit values in one register, but need to pass as two
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 2792f65d04..92ca10dffc 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -29,6 +29,7 @@
 #include "exec/memop.h"
 #include "exec/tb-context.h"
 #include "qemu/bitops.h"
+#include "qemu/plugin.h"
 #include "qemu/queue.h"
 #include "tcg-mo.h"
 #include "tcg-target.h"
@@ -538,6 +539,9 @@ typedef struct TCGOp {
 
     /* Next and previous opcodes.  */
     QTAILQ_ENTRY(TCGOp) link;
+#ifdef CONFIG_PLUGIN
+    QSIMPLEQ_ENTRY(TCGOp) plugin_link;
+#endif
 
     /* Arguments for the opcode.  */
     TCGArg args[MAX_OPC_PARAM];
@@ -639,6 +643,23 @@ struct TCGContext {
 
     TCGLabel *exitreq_label;
 
+#ifdef CONFIG_PLUGIN
+    /*
+     * We keep one plugin_tb struct per TCGContext. Note that on every TB
+     * translation we clear but do not free its contents; this way we
+     * avoid a lot of malloc/free churn, since after a few TB's it's
+     * unlikely that we'll need to allocate either more instructions or more
+     * space for instructions (for variable-instruction-length ISAs).
+     */
+    struct qemu_plugin_tb *plugin_tb;
+
+    /* descriptor of the instruction being translated */
+    struct qemu_plugin_insn *plugin_insn;
+
+    /* list to quickly access the injected ops */
+    QSIMPLEQ_HEAD(, TCGOp) plugin_ops;
+#endif
+
     TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
     TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
 
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 34ec03391c..c79402ab75 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -1066,6 +1066,15 @@ check-softfloat:
 		"SKIPPED for non-TCG builds")
 endif
 
+# Plugins
+ifeq ($(CONFIG_PLUGIN),y)
+.PHONY: plugins
+plugins:
+	$(call quiet-command,\
+		$(MAKE) $(SUBDIR_MAKEFLAGS) -C tests/plugin V="$(V)", \
+		"BUILD", "plugins")
+endif
+
 # Per guest TCG tests
 
 BUILD_TCG_TARGET_RULES=$(patsubst %,build-tcg-tests-%, $(TARGET_DIRS))
@@ -1076,7 +1085,7 @@ RUN_TCG_TARGET_RULES=$(patsubst %,run-tcg-tests-%, $(TARGET_DIRS))
 $(foreach PROBE_TARGET,$(TARGET_DIRS), 				\
 	$(eval -include $(SRC_PATH)/tests/tcg/Makefile.prereqs))
 
-build-tcg-tests-%:
+build-tcg-tests-%: $(if $(CONFIG_PLUGIN),plugins)
 	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) \
 		-f $(SRC_PATH)/tests/tcg/Makefile.qemu \
 		SRC_PATH=$(SRC_PATH) \
diff --git a/tests/plugin/Makefile b/tests/plugin/Makefile
new file mode 100644
index 0000000000..75467b6db8
--- /dev/null
+++ b/tests/plugin/Makefile
@@ -0,0 +1,31 @@
+BUILD_DIR := $(CURDIR)/../..
+
+include $(BUILD_DIR)/config-host.mak
+include $(SRC_PATH)/rules.mak
+
+$(call set-vpath, $(SRC_PATH)/tests/plugin)
+
+NAMES :=
+NAMES += bb
+NAMES += empty
+NAMES += insn
+NAMES += mem
+NAMES += hotblocks
+NAMES += howvec
+NAMES += hotpages
+
+SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
+
+QEMU_CFLAGS += -fPIC
+QEMU_CFLAGS += -I$(SRC_PATH)/include/qemu
+
+all: $(SONAMES)
+
+lib%.so: %.o
+	$(CC) -shared -Wl,-soname,$@ -o $@ $^ $(LDLIBS)
+
+clean:
+	rm -f *.o *.so *.d
+	rm -Rf .libs
+
+.PHONY: all clean
diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c
new file mode 100644
index 0000000000..45e1de5bd6
--- /dev/null
+++ b/tests/plugin/bb.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+static uint64_t bb_count;
+static uint64_t insn_count;
+static bool do_inline;
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    g_autofree gchar *out;
+    out = g_strdup_printf("bb's: %" PRIu64", insns: %" PRIu64 "\n",
+                          bb_count, insn_count);
+    qemu_plugin_outs(out);
+}
+
+static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
+{
+    unsigned long n_insns = (unsigned long)udata;
+
+    insn_count += n_insns;
+    bb_count++;
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    unsigned long n_insns = qemu_plugin_tb_n_insns(tb);
+
+    if (do_inline) {
+        qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,
+                                                 &bb_count, 1);
+        qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,
+                                                 &insn_count, n_insns);
+    } else {
+        qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,
+                                             QEMU_PLUGIN_CB_NO_REGS,
+                                             (void *)n_insns);
+    }
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info,
+                                           int argc, char **argv)
+{
+    if (argc && strcmp(argv[0], "inline") == 0) {
+        do_inline = true;
+    }
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+    return 0;
+}
diff --git a/tests/plugin/empty.c b/tests/plugin/empty.c
new file mode 100644
index 0000000000..3f60f69027
--- /dev/null
+++ b/tests/plugin/empty.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include <qemu-plugin.h>
+
+/*
+ * Empty TB translation callback.
+ * This allows us to measure the overhead of injecting and then
+ * removing empty instrumentation.
+ */
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{ }
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info,
+                                           int argc, char **argv)
+{
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    return 0;
+}
diff --git a/tests/plugin/hotblocks.c b/tests/plugin/hotblocks.c
new file mode 100644
index 0000000000..1bd183849a
--- /dev/null
+++ b/tests/plugin/hotblocks.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+static bool do_inline;
+
+/* Plugins need to take care of their own locking */
+static GMutex lock;
+static GHashTable *hotblocks;
+static guint64 limit = 20;
+
+/*
+ * Counting Structure
+ *
+ * The internals of the TCG are not exposed to plugins so we can only
+ * get the starting PC for each block. We cheat this slightly by
+ * xor'ing the number of instructions to the hash to help
+ * differentiate.
+ */
+typedef struct {
+    uint64_t start_addr;
+    uint64_t exec_count;
+    int      trans_count;
+    unsigned long insns;
+} ExecCount;
+
+static gint cmp_exec_count(gconstpointer a, gconstpointer b)
+{
+    ExecCount *ea = (ExecCount *) a;
+    ExecCount *eb = (ExecCount *) b;
+    return ea->exec_count > eb->exec_count ? -1 : 1;
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    g_autoptr(GString) report = g_string_new("collected ");
+    GList *counts, *it;
+    int i;
+
+    g_mutex_lock(&lock);
+    g_string_append_printf(report, "%d entries in the hash table\n",
+                           g_hash_table_size(hotblocks));
+    counts = g_hash_table_get_values(hotblocks);
+    it = g_list_sort(counts, cmp_exec_count);
+
+    if (it) {
+        g_string_append_printf(report, "pc, tcount, icount, ecount\n");
+
+        for (i = 0; i < limit && it->next; i++, it = it->next) {
+            ExecCount *rec = (ExecCount *) it->data;
+            g_string_append_printf(report, "%#016"PRIx64", %d, %ld, %"PRId64"\n",
+                                   rec->start_addr, rec->trans_count,
+                                   rec->insns, rec->exec_count);
+        }
+
+        g_list_free(it);
+        g_mutex_unlock(&lock);
+    }
+
+    qemu_plugin_outs(report->str);
+}
+
+static void plugin_init(void)
+{
+    hotblocks = g_hash_table_new(NULL, g_direct_equal);
+}
+
+static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
+{
+    ExecCount *cnt;
+    uint64_t hash = (uint64_t) udata;
+
+    g_mutex_lock(&lock);
+    cnt = (ExecCount *) g_hash_table_lookup(hotblocks, (gconstpointer) hash);
+    /* should always succeed */
+    g_assert(cnt);
+    cnt->exec_count++;
+    g_mutex_unlock(&lock);
+}
+
+/*
+ * When do_inline we ask the plugin to increment the counter for us.
+ * Otherwise a helper is inserted which calls the vcpu_tb_exec
+ * callback.
+ */
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    ExecCount *cnt;
+    uint64_t pc = qemu_plugin_tb_vaddr(tb);
+    unsigned long insns = qemu_plugin_tb_n_insns(tb);
+    uint64_t hash = pc ^ insns;
+
+    g_mutex_lock(&lock);
+    cnt = (ExecCount *) g_hash_table_lookup(hotblocks, (gconstpointer) hash);
+    if (cnt) {
+        cnt->trans_count++;
+    } else {
+        cnt = g_new0(ExecCount, 1);
+        cnt->start_addr = pc;
+        cnt->trans_count = 1;
+        cnt->insns = insns;
+        g_hash_table_insert(hotblocks, (gpointer) hash, (gpointer) cnt);
+    }
+
+    g_mutex_unlock(&lock);
+
+    if (do_inline) {
+        qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,
+                                                 &cnt->exec_count, 1);
+    } else {
+        qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,
+                                             QEMU_PLUGIN_CB_NO_REGS,
+                                             (void *)hash);
+    }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+                        int argc, char **argv)
+{
+    if (argc && strcmp(argv[0], "inline") == 0) {
+        do_inline = true;
+    }
+
+    plugin_init();
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+    return 0;
+}
diff --git a/tests/plugin/hotpages.c b/tests/plugin/hotpages.c
new file mode 100644
index 0000000000..77df07a3cc
--- /dev/null
+++ b/tests/plugin/hotpages.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * Hot Pages - show which pages saw the most memory accesses.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+static uint64_t page_size = 4096;
+static uint64_t page_mask;
+static int limit = 50;
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+static bool track_io;
+
+enum sort_type {
+    SORT_RW = 0,
+    SORT_R,
+    SORT_W,
+    SORT_A
+};
+
+static int sort_by = SORT_RW;
+
+typedef struct {
+    uint64_t page_address;
+    int cpu_read;
+    int cpu_write;
+    uint64_t reads;
+    uint64_t writes;
+} PageCounters;
+
+static GMutex lock;
+static GHashTable *pages;
+
+static gint cmp_access_count(gconstpointer a, gconstpointer b)
+{
+    PageCounters *ea = (PageCounters *) a;
+    PageCounters *eb = (PageCounters *) b;
+    int r;
+    switch (sort_by) {
+    case SORT_RW:
+        r = (ea->reads + ea->writes) > (eb->reads + eb->writes) ? -1 : 1;
+        break;
+    case SORT_R:
+        r = ea->reads > eb->reads ? -1 : 1;
+        break;
+    case SORT_W:
+        r = ea->writes > eb->writes ? -1 : 1;
+        break;
+    case SORT_A:
+        r = ea->page_address > eb->page_address ? -1 : 1;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return r;
+}
+
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    g_autoptr(GString) report = g_string_new("Addr, RCPUs, Reads, WCPUs, Writes\n");
+    int i;
+    GList *counts;
+
+    counts = g_hash_table_get_values(pages);
+    if (counts && g_list_next(counts)) {
+        GList *it;
+
+        it = g_list_sort(counts, cmp_access_count);
+
+        for (i = 0; i < limit && it->next; i++, it = it->next) {
+            PageCounters *rec = (PageCounters *) it->data;
+            g_string_append_printf(report,
+                                   "%#016"PRIx64", 0x%04x, %"PRId64
+                                   ", 0x%04x, %"PRId64"\n",
+                                   rec->page_address,
+                                   rec->cpu_read, rec->reads,
+                                   rec->cpu_write, rec->writes);
+        }
+        g_list_free(it);
+    }
+
+    qemu_plugin_outs(report->str);
+}
+
+static void plugin_init(void)
+{
+    page_mask = (page_size - 1);
+    pages = g_hash_table_new(NULL, g_direct_equal);
+}
+
+static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,
+                       uint64_t vaddr, void *udata)
+{
+    struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr);
+    uint64_t page;
+    PageCounters *count;
+
+    /* We only get a hwaddr for system emulation */
+    if (track_io) {
+        if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
+            page = vaddr;
+        } else {
+            return;
+        }
+    } else {
+        if (hwaddr && !qemu_plugin_hwaddr_is_io(hwaddr)) {
+            page = (uint64_t) qemu_plugin_hwaddr_device_offset(hwaddr);
+        } else {
+            page = vaddr;
+        }
+    }
+    page &= ~page_mask;
+
+    g_mutex_lock(&lock);
+    count = (PageCounters *) g_hash_table_lookup(pages, GUINT_TO_POINTER(page));
+
+    if (!count) {
+        count = g_new0(PageCounters, 1);
+        count->page_address = page;
+        g_hash_table_insert(pages, GUINT_TO_POINTER(page), (gpointer) count);
+    }
+    if (qemu_plugin_mem_is_store(meminfo)) {
+        count->writes++;
+        count->cpu_write |= (1 << cpu_index);
+    } else {
+        count->reads++;
+        count->cpu_read |= (1 << cpu_index);
+    }
+
+    g_mutex_unlock(&lock);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n = qemu_plugin_tb_n_insns(tb);
+    size_t i;
+
+    for (i = 0; i < n; i++) {
+        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr,
+                                         QEMU_PLUGIN_CB_NO_REGS,
+                                         rw, NULL);
+    }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+                        int argc, char **argv)
+{
+    int i;
+
+    for (i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        if (g_strcmp0(opt, "reads") == 0) {
+            sort_by = SORT_R;
+        } else if (g_strcmp0(opt, "writes") == 0) {
+            sort_by = SORT_W;
+        } else if (g_strcmp0(opt, "address") == 0) {
+            sort_by = SORT_A;
+        } else if (g_strcmp0(opt, "io") == 0) {
+            track_io = true;
+        } else if (g_str_has_prefix(opt, "pagesize=")) {
+            page_size = g_ascii_strtoull(opt + 9, NULL, 10);
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    plugin_init();
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+    return 0;
+}
diff --git a/tests/plugin/howvec.c b/tests/plugin/howvec.c
new file mode 100644
index 0000000000..58fa675e34
--- /dev/null
+++ b/tests/plugin/howvec.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * How vectorised is this code?
+ *
+ * Attempt to measure the amount of vectorisation that has been done
+ * on some code by counting classes of instruction.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef enum {
+    COUNT_CLASS,
+    COUNT_INDIVIDUAL,
+    COUNT_NONE
+} CountType;
+
+static int limit = 50;
+static bool do_inline;
+static bool verbose;
+
+static GMutex lock;
+static GHashTable *insns;
+
+typedef struct {
+    const char *class;
+    const char *opt;
+    uint32_t mask;
+    uint32_t pattern;
+    CountType what;
+    uint64_t count;
+} InsnClassExecCount;
+
+typedef struct {
+    char *insn;
+    uint32_t opcode;
+    uint64_t count;
+    InsnClassExecCount *class;
+} InsnExecCount;
+
+/*
+ * Matchers for classes of instructions, order is important.
+ *
+ * Your most precise match must be before looser matches. If no match
+ * is found in the table we can create an individual entry.
+ *
+ * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0
+ */
+static InsnClassExecCount aarch64_insn_classes[] = {
+    /* "Reserved"" */
+    { "  UDEF",              "udef",   0xffff0000, 0x00000000, COUNT_NONE},
+    { "  SVE",               "sve",    0x1e000000, 0x04000000, COUNT_CLASS},
+    { "Reserved",            "res",    0x1e000000, 0x00000000, COUNT_CLASS},
+    /* Data Processing Immediate */
+    { "  PCrel addr",        "pcrel",  0x1f000000, 0x10000000, COUNT_CLASS},
+    { "  Add/Sub (imm,tags)","asit",   0x1f800000, 0x11800000, COUNT_CLASS},
+    { "  Add/Sub (imm)",     "asi",    0x1f000000, 0x11000000, COUNT_CLASS},
+    { "  Logical (imm)",     "logi",   0x1f800000, 0x12000000, COUNT_CLASS},
+    { "  Move Wide (imm)",   "movwi",  0x1f800000, 0x12800000, COUNT_CLASS},
+    { "  Bitfield",          "bitf",   0x1f800000, 0x13000000, COUNT_CLASS},
+    { "  Extract",           "extr",   0x1f800000, 0x13800000, COUNT_CLASS},
+    { "Data Proc Imm",       "dpri",   0x1c000000, 0x10000000, COUNT_CLASS},
+    /* Branches */
+    { "  Cond Branch (imm)", "cndb",   0xfe000000, 0x54000000, COUNT_CLASS},
+    { "  Exception Gen",     "excp",   0xff000000, 0xd4000000, COUNT_CLASS},
+    { "    NOP",             "nop",    0xffffffff, 0xd503201f, COUNT_NONE},
+    { "  Hints",             "hint",   0xfffff000, 0xd5032000, COUNT_CLASS},
+    { "  Barriers",          "barr",   0xfffff000, 0xd5033000, COUNT_CLASS},
+    { "  PSTATE",            "psta",   0xfff8f000, 0xd5004000, COUNT_CLASS},
+    { "  System Insn",       "sins",   0xffd80000, 0xd5080000, COUNT_CLASS},
+    { "  System Reg",        "sreg",   0xffd00000, 0xd5100000, COUNT_CLASS},
+    { "  Branch (reg)",      "breg",   0xfe000000, 0xd6000000, COUNT_CLASS},
+    { "  Branch (imm)",      "bimm",   0x7c000000, 0x14000000, COUNT_CLASS},
+    { "  Cmp & Branch",      "cmpb",   0x7e000000, 0x34000000, COUNT_CLASS},
+    { "  Tst & Branch",      "tstb",   0x7e000000, 0x36000000, COUNT_CLASS},
+    { "Branches",            "branch", 0x1c000000, 0x14000000, COUNT_CLASS},
+    /* Loads and Stores */
+    { "  AdvSimd ldstmult",  "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS},
+    { "  AdvSimd ldstmult++","advlsmp",0xbfb00000, 0x0c800000, COUNT_CLASS},
+    { "  AdvSimd ldst",      "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS},
+    { "  AdvSimd ldst++",    "advlssp",0xbf800000, 0x0d800000, COUNT_CLASS},
+    { "  ldst excl",         "ldstx",  0x3f000000, 0x08000000, COUNT_CLASS},
+    { "    Prefetch",        "prfm",   0xff000000, 0xd8000000, COUNT_CLASS},
+    { "  Load Reg (lit)",    "ldlit",  0x1b000000, 0x18000000, COUNT_CLASS},
+    { "  ldst noalloc pair", "ldstnap",0x3b800000, 0x28000000, COUNT_CLASS},
+    { "  ldst pair",         "ldstp",  0x38000000, 0x28000000, COUNT_CLASS},
+    { "  ldst reg",          "ldstr",  0x3b200000, 0x38000000, COUNT_CLASS},
+    { "  Atomic ldst",       "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS},
+    { "  ldst reg (reg off)","ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
+    { "  ldst reg (pac)",    "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS},
+    { "  ldst reg (imm)",    "ldsti",  0x3b000000, 0x39000000, COUNT_CLASS},
+    { "Loads & Stores",      "ldst",   0x0a000000, 0x08000000, COUNT_CLASS},
+    /* Data Processing Register */
+    { "Data Proc Reg",       "dprr",   0x0e000000, 0x0a000000, COUNT_CLASS},
+    /* Scalar FP */
+    { "Scalar FP ",          "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS},
+    /* Unclassified */
+    { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_CLASS},
+};
+
+static InsnClassExecCount sparc32_insn_classes[] = {
+    { "Call",                "call",   0xc0000000, 0x40000000, COUNT_CLASS},
+    { "Branch ICond",        "bcc",    0xc1c00000, 0x00800000, COUNT_CLASS},
+    { "Branch Fcond",        "fbcc",   0xc1c00000, 0x01800000, COUNT_CLASS},
+    { "SetHi",               "sethi",  0xc1c00000, 0x01000000, COUNT_CLASS},
+    { "FPU ALU",             "fpu",    0xc1f00000, 0x81a00000, COUNT_CLASS},
+    { "ALU",                 "alu",    0xc0000000, 0x80000000, COUNT_CLASS},
+    { "Load/Store",          "ldst",   0xc0000000, 0xc0000000, COUNT_CLASS},
+    /* Unclassified */
+    { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
+};
+
+static InsnClassExecCount sparc64_insn_classes[] = {
+    { "SetHi & Branches",     "op0",   0xc0000000, 0x00000000, COUNT_CLASS},
+    { "Call",                 "op1",   0xc0000000, 0x40000000, COUNT_CLASS},
+    { "Arith/Logical/Move",   "op2",   0xc0000000, 0x80000000, COUNT_CLASS},
+    { "Arith/Logical/Move",   "op3",   0xc0000000, 0xc0000000, COUNT_CLASS},
+    /* Unclassified */
+    { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
+};
+
+/* Default matcher for currently unclassified architectures */
+static InsnClassExecCount default_insn_classes[] = {
+    { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
+};
+
+typedef struct {
+    const char *qemu_target;
+    InsnClassExecCount *table;
+    int table_sz;
+} ClassSelector;
+
+static ClassSelector class_tables[] =
+{
+    { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) },
+    { "sparc",   sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) },
+    { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) },
+    { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) },
+};
+
+static InsnClassExecCount *class_table;
+static int class_table_sz;
+
+static gint cmp_exec_count(gconstpointer a, gconstpointer b)
+{
+    InsnExecCount *ea = (InsnExecCount *) a;
+    InsnExecCount *eb = (InsnExecCount *) b;
+    return ea->count > eb->count ? -1 : 1;
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    g_autoptr(GString) report = g_string_new("Instruction Classes:\n");
+    int i;
+    GList *counts;
+    InsnClassExecCount *class = NULL;
+
+    for (i = 0; i < class_table_sz; i++) {
+        class = &class_table[i];
+        switch (class->what) {
+        case COUNT_CLASS:
+            if (class->count || verbose) {
+                g_string_append_printf(report, "Class: %-24s\t(%ld hits)\n",
+                                       class->class,
+                                       class->count);
+            }
+            break;
+        case COUNT_INDIVIDUAL:
+            g_string_append_printf(report, "Class: %-24s\tcounted individually\n",
+                                   class->class);
+            break;
+        case COUNT_NONE:
+            g_string_append_printf(report, "Class: %-24s\tnot counted\n",
+                                   class->class);
+            break;
+        default:
+            break;
+        }
+    }
+
+    counts = g_hash_table_get_values(insns);
+    if (counts && g_list_next(counts)) {
+        GList *it;
+
+        g_string_append_printf(report,"Individual Instructions:\n");
+
+        it = g_list_sort(counts, cmp_exec_count);
+
+        for (i = 0; i < limit && it->next; i++, it = it->next) {
+            InsnExecCount *rec = (InsnExecCount *) it->data;
+            g_string_append_printf(report, "Instr: %-24s\t(%ld hits)\t(op=%#08x/%s)\n",
+                                   rec->insn,
+                                   rec->count,
+                                   rec->opcode,
+                                   rec->class ?
+                                   rec->class->class : "un-categorised");
+        }
+        g_list_free(it);
+    }
+
+    qemu_plugin_outs(report->str);
+}
+
+static void plugin_init(void)
+{
+    insns = g_hash_table_new(NULL, g_direct_equal);
+}
+
+static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
+{
+    uint64_t *count = (uint64_t *) udata;
+    (*count)++;
+}
+
+static uint64_t * find_counter(struct qemu_plugin_insn *insn)
+{
+    int i;
+    uint64_t *cnt = NULL;
+    uint32_t opcode;
+    InsnClassExecCount *class = NULL;
+
+    /*
+     * We only match the first 32 bits of the instruction which is
+     * fine for most RISCs but a bit limiting for CISC architectures.
+     * They would probably benefit from a more tailored plugin.
+     * However we can fall back to individual instruction counting.
+     */
+    opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
+
+    for (i = 0; !cnt && i < class_table_sz; i++) {
+        class = &class_table[i];
+        uint32_t masked_bits = opcode & class->mask;
+        if (masked_bits == class->pattern) {
+            break;
+        }
+    }
+
+    g_assert(class);
+
+    switch (class->what) {
+    case COUNT_NONE:
+        return NULL;
+    case COUNT_CLASS:
+        return &class->count;
+    case COUNT_INDIVIDUAL:
+    {
+        InsnExecCount *icount;
+
+        g_mutex_lock(&lock);
+        icount = (InsnExecCount *) g_hash_table_lookup(insns,
+                                                       GUINT_TO_POINTER(opcode));
+
+        if (!icount) {
+            icount = g_new0(InsnExecCount, 1);
+            icount->opcode = opcode;
+            icount->insn = qemu_plugin_insn_disas(insn);
+            icount->class = class;
+
+            g_hash_table_insert(insns, GUINT_TO_POINTER(opcode),
+                                (gpointer) icount);
+        }
+        g_mutex_unlock(&lock);
+
+        return &icount->count;
+    }
+    default:
+        g_assert_not_reached();
+    }
+
+    return NULL;
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n = qemu_plugin_tb_n_insns(tb);
+    size_t i;
+
+    for (i = 0; i < n; i++) {
+        uint64_t *cnt;
+        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+        cnt = find_counter(insn);
+
+        if (cnt) {
+            if (do_inline) {
+                qemu_plugin_register_vcpu_insn_exec_inline(
+                    insn, QEMU_PLUGIN_INLINE_ADD_U64, cnt, 1);
+            } else {
+                qemu_plugin_register_vcpu_insn_exec_cb(
+                    insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt);
+            }
+        }
+    }
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info,
+                                           int argc, char **argv)
+{
+    int i;
+
+    /* Select a class table appropriate to the guest architecture */
+    for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
+        ClassSelector *entry = &class_tables[i];
+        if (!entry->qemu_target ||
+            strcmp(entry->qemu_target, info->target_name) == 0) {
+            class_table = entry->table;
+            class_table_sz = entry->table_sz;
+            break;
+        }
+    }
+
+    for (i = 0; i < argc; i++) {
+        char *p = argv[i];
+        if (strcmp(p, "inline") == 0) {
+            do_inline = true;
+        } else if (strcmp(p, "verbose") == 0) {
+            verbose = true;
+        } else {
+            int j;
+            CountType type = COUNT_INDIVIDUAL;
+            if (*p == '!') {
+                type = COUNT_NONE;
+                p++;
+            }
+            for (j = 0; j < class_table_sz; j++) {
+                if (strcmp(p, class_table[j].opt) == 0) {
+                    class_table[j].what = type;
+                    break;
+                }
+            }
+        }
+    }
+
+    plugin_init();
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+    return 0;
+}
diff --git a/tests/plugin/insn.c b/tests/plugin/insn.c
new file mode 100644
index 0000000000..e5fd07fb64
--- /dev/null
+++ b/tests/plugin/insn.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+static uint64_t insn_count;
+static bool do_inline;
+
+static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
+{
+    insn_count++;
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n = qemu_plugin_tb_n_insns(tb);
+    size_t i;
+
+    for (i = 0; i < n; i++) {
+        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+
+        if (do_inline) {
+            qemu_plugin_register_vcpu_insn_exec_inline(
+                insn, QEMU_PLUGIN_INLINE_ADD_U64, &insn_count, 1);
+        } else {
+            qemu_plugin_register_vcpu_insn_exec_cb(
+                insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, NULL);
+        }
+    }
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    g_autofree gchar *out;
+    out = g_strdup_printf("insns: %" PRIu64 "\n", insn_count);
+    qemu_plugin_outs(out);
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info,
+                                           int argc, char **argv)
+{
+    if (argc && !strcmp(argv[0], "inline")) {
+        do_inline = true;
+    }
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+    return 0;
+}
diff --git a/tests/plugin/mem.c b/tests/plugin/mem.c
new file mode 100644
index 0000000000..d967388989
--- /dev/null
+++ b/tests/plugin/mem.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+static uint64_t mem_count;
+static uint64_t io_count;
+static bool do_inline;
+static bool do_haddr;
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    g_autoptr(GString) out = g_string_new("");
+
+    g_string_printf(out, "mem accesses: %" PRIu64 "\n", mem_count);
+    if (do_haddr) {
+        g_string_append_printf(out, "io accesses: %" PRIu64 "\n", mem_count);
+    }
+    qemu_plugin_outs(out->str);
+}
+
+static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,
+                     uint64_t vaddr, void *udata)
+{
+    if (do_haddr) {
+        struct qemu_plugin_hwaddr *hwaddr;
+        hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr);
+        if (qemu_plugin_hwaddr_is_io(hwaddr)) {
+            io_count++;
+        } else {
+            mem_count++;
+        }
+    } else {
+        mem_count++;
+    }
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n = qemu_plugin_tb_n_insns(tb);
+    size_t i;
+
+    for (i = 0; i < n; i++) {
+        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+
+        if (do_inline) {
+            qemu_plugin_register_vcpu_mem_inline(insn, rw,
+                                                 QEMU_PLUGIN_INLINE_ADD_U64,
+                                                 &mem_count, 1);
+        } else {
+            qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem,
+                                             QEMU_PLUGIN_CB_NO_REGS,
+                                             rw, NULL);
+        }
+    }
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info,
+                                           int argc, char **argv)
+{
+    if (argc) {
+        if (argc >= 3) {
+            if (!strcmp(argv[2], "haddr")) {
+                do_haddr = true;
+            }
+        }
+        if (argc >= 2) {
+            const char *str = argv[1];
+
+            if (!strcmp(str, "r")) {
+                rw = QEMU_PLUGIN_MEM_R;
+            } else if (!strcmp(str, "w")) {
+                rw = QEMU_PLUGIN_MEM_W;
+            }
+        }
+        if (!strcmp(argv[0], "inline")) {
+            do_inline = true;
+        }
+    }
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+    return 0;
+}
diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target
index 5a9a6faba4..3c7421a356 100644
--- a/tests/tcg/Makefile.target
+++ b/tests/tcg/Makefile.target
@@ -30,7 +30,7 @@
 #
 
 all:
--include ../../config-host.mak
+-include ../../../config-host.mak
 -include ../config-$(TARGET).mak
 
 # for including , in command strings
@@ -66,6 +66,8 @@ conditional-diff-out = 							\
 
 # Tests we are building
 TESTS=
+# additional tests which may re-use existing binaries
+EXTRA_TESTS=
 
 # Start with a blank slate, the build targets get to add stuff first
 CFLAGS=
@@ -109,7 +111,7 @@ else
 
 endif
 
-all: $(TESTS)
+all: $(TESTS) $(EXTRA_TESTS)
 
 #
 # Test Runners
@@ -121,11 +123,39 @@ all: $(TESTS)
 #
 
 RUN_TESTS=$(patsubst %,run-%, $(TESTS))
+
+# If plugins exist also include those in the tests
+ifeq ($(CONFIG_PLUGIN),y)
+PLUGIN_DIR=../../plugin
+VPATH+=$(PLUGIN_DIR)
+PLUGINS=$(notdir $(wildcard $(PLUGIN_DIR)/*.so))
+
+# We need to ensure expand the run-plugin-TEST-with-PLUGIN
+# pre-requistes manually here as we can't use stems to handle it. We
+# also add some special helpers the run-plugin- rules can use bellow.
+
+$(foreach p,$(PLUGINS), \
+	$(foreach t,$(TESTS),\
+		$(eval run-plugin-$(t)-with-$(p): $t $p) \
+		$(eval run-plugin-$(t)-with-$(p): TIMEOUT=30) \
+		$(eval RUN_TESTS+=run-plugin-$(t)-with-$(p))))
+endif
+
+strip-plugin = $(wordlist 1, 1, $(subst -with-, ,$1))
+extract-plugin = $(wordlist 2, 2, $(subst -with-, ,$1))
+
 RUN_TESTS+=$(EXTRA_RUNS)
 
 ifdef CONFIG_USER_ONLY
 run-%: %
 	$(call run-test, $<, $(QEMU) $(QEMU_OPTS) $<, "$< on $(TARGET_NAME)")
+
+run-plugin-%:
+	$(call run-test, $@, $(QEMU) $(QEMU_OPTS) \
+		-plugin $(PLUGIN_DIR)/$(call extract-plugin,$@) \
+		-d plugin -D $*.pout \
+		 $(call strip-plugin,$<), \
+	"$* on $(TARGET_NAME)")
 else
 run-%: %
 	$(call run-test, $<, \
@@ -133,6 +163,15 @@ run-%: %
 		  -chardev file$(COMMA)path=$<.out$(COMMA)id=output \
 	   	  $(QEMU_OPTS) $<, \
 	  "$< on $(TARGET_NAME)")
+
+run-plugin-%:
+	$(call run-test, $@, \
+	  $(QEMU) -monitor none -display none \
+		  -chardev file$(COMMA)path=$@.out$(COMMA)id=output \
+	   	  -plugin $(PLUGIN_DIR)/$(call extract-plugin,$@) \
+	    	  -d plugin -D $*.pout \
+	   	  $(QEMU_OPTS) $(call strip-plugin,$<), \
+	  "$* on $(TARGET_NAME)")
 endif
 
 gdb-%: %
diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target
index b4b3957963..950dbb4bac 100644
--- a/tests/tcg/aarch64/Makefile.softmmu-target
+++ b/tests/tcg/aarch64/Makefile.softmmu-target
@@ -52,4 +52,4 @@ run-memory-replay: memory-replay run-memory-record
 	   	  $(QEMU_OPTS) memory, \
 	  "$< on $(TARGET_NAME)")
 
-TESTS+=memory-record memory-replay
+EXTRA_TESTS+=memory-record memory-replay
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
index 509f1afa93..96d2321045 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -26,4 +26,10 @@ AARCH64_TESTS += semihosting
 run-semihosting: semihosting
 	$(call run-test,$<,$(QEMU) $< 2> $<.err, "$< on $(TARGET_NAME)")
 
+run-plugin-semihosting-with-%:
+	$(call run-test, $@, $(QEMU) $(QEMU_OPTS) \
+		-plugin $(PLUGIN_DIR)/$(call extract-plugin,$@) \
+		 $(call strip-plugin,$<) 2> $<.err, \
+		"$< on $(TARGET_NAME) with $*")
+
 TESTS += $(AARCH64_TESTS)
diff --git a/tests/tcg/arm/Makefile.softmmu-target b/tests/tcg/arm/Makefile.softmmu-target
index 231e9a57b4..3fe237ba39 100644
--- a/tests/tcg/arm/Makefile.softmmu-target
+++ b/tests/tcg/arm/Makefile.softmmu-target
@@ -23,3 +23,4 @@ LDFLAGS+=-nostdlib -N -static
 test-armv6m-undef: EXTRA_CFLAGS+=-mcpu=cortex-m0
 
 run-test-armv6m-undef: QEMU_OPTS+=-semihosting -M microbit -kernel
+run-plugin-test-armv6m-undef-%: QEMU_OPTS+=-semihosting -M microbit -kernel
diff --git a/tests/tcg/arm/Makefile.target b/tests/tcg/arm/Makefile.target
index 3ddff85240..0765f37ff0 100644
--- a/tests/tcg/arm/Makefile.target
+++ b/tests/tcg/arm/Makefile.target
@@ -34,6 +34,12 @@ ARM_TESTS += semihosting
 run-semihosting: semihosting
 	$(call run-test,$<,$(QEMU) $< 2> $<.err, "$< on $(TARGET_NAME)")
 
+run-plugin-semihosting-with-%:
+	$(call run-test, $@, $(QEMU) $(QEMU_OPTS) \
+		-plugin $(PLUGIN_DIR)/$(call extract-plugin,$@) \
+		 $(call strip-plugin,$<) 2> $<.err, \
+		"$< on $(TARGET_NAME) with $*")
+
 TESTS += $(ARM_TESTS)
 
 # On ARM Linux only supports 4k pages
diff --git a/tests/tcg/cris/Makefile.target b/tests/tcg/cris/Makefile.target
index c1173ead42..24c7f2e761 100644
--- a/tests/tcg/cris/Makefile.target
+++ b/tests/tcg/cris/Makefile.target
@@ -47,7 +47,7 @@ CRT_FILES = crt.o sys.o
 	$(CC) $(ASFLAGS) $< -o $@ $(LDFLAGS) $(NOSTDFLAGS) $(CRT_FILES)
 
 # The default CPU breaks (possibly as it's max?) so force crisv17
-$(CRIS_RUNS): QEMU_OPTS=-cpu crisv17
+QEMU_OPTS=-cpu crisv17
 
 # Additional runners to run under GNU SIM
 CRIS_RUNS_ON_SIM=$(patsubst %, %-on-sim, $(CRIS_RUNS))
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index 08c5736a4d..43ee2e181e 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -7,10 +7,8 @@ VPATH 		+= $(I386_SRC)
 
 I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
 ALL_X86_TESTS=$(I386_SRCS:.c=)
-I386_TESTS:=$(filter-out test-i386-ssse3, $(ALL_X86_TESTS))
+SKIP_I386_TESTS=test-i386-ssse3
 X86_64_TESTS:=$(filter test-i386-ssse3, $(ALL_X86_TESTS))
-# Update TESTS
-TESTS=$(MULTIARCH_TESTS) $(I386_TESTS)
 
 #
 # hello-i386 is a barebones app
@@ -36,9 +34,12 @@ run-test-i386-fprem: test-i386-fprem test-i386-fprem.ref
 	$(call run-test,test-i386-fprem, $(QEMU) $<,"$< on $(TARGET_NAME)")
 	$(call diff-out,test-i386-fprem, test-i386-fprem.ref)
 else
-run-test-i386-fprem: test-i386-fprem
-	$(call skip-test, $<, "SLOW")
+SKIP_I386_TESTS+=test-i386-fprem
 endif
 
+# Update TESTS
+I386_TESTS:=$(filter-out $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
+TESTS=$(MULTIARCH_TESTS) $(I386_TESTS)
+
 # On i386 and x86_64 Linux only supports 4k pages (large pages are a different hack)
 EXTRA_RUNS+=run-test-mmap-4096
diff --git a/trace-events b/trace-events
index 20821ba545..42107ebc69 100644
--- a/trace-events
+++ b/trace-events
@@ -149,15 +149,17 @@ vcpu guest_cpu_reset(void)
 # Access information can be parsed as:
 #
 # struct mem_info {
-#     uint8_t size_shift : 2; /* interpreted as "1 << size_shift" bytes */
+#     uint8_t size_shift : 4; /* interpreted as "1 << size_shift" bytes */
 #     bool    sign_extend: 1; /* sign-extended */
 #     uint8_t endianness : 1; /* 0: little, 1: big */
-#     bool    store      : 1; /* wheter it's a store operation */
+#     bool    store      : 1; /* whether it is a store operation */
+#             pad        : 1;
+#     uint8_t mmuidx     : 4; /* mmuidx (softmmu only)  */
 # };
 #
 # Mode: user, softmmu
 # Targets: TCG(all)
-vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
+vcpu tcg guest_mem_before(TCGv vaddr, uint16_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
 
 # linux-user/syscall.c
 # bsd-user/syscall.c
diff --git a/trace/mem-internal.h b/trace/mem-internal.h
index 3444fbc596..0a32aa22ca 100644
--- a/trace/mem-internal.h
+++ b/trace/mem-internal.h
@@ -10,15 +10,17 @@
 #ifndef TRACE__MEM_INTERNAL_H
 #define TRACE__MEM_INTERNAL_H
 
-#define TRACE_MEM_SZ_SHIFT_MASK 0x7 /* size shift mask */
-#define TRACE_MEM_SE (1ULL << 3)    /* sign extended (y/n) */
-#define TRACE_MEM_BE (1ULL << 4)    /* big endian (y/n) */
-#define TRACE_MEM_ST (1ULL << 5)    /* store (y/n) */
+#define TRACE_MEM_SZ_SHIFT_MASK 0xf /* size shift mask */
+#define TRACE_MEM_SE (1ULL << 4)    /* sign extended (y/n) */
+#define TRACE_MEM_BE (1ULL << 5)    /* big endian (y/n) */
+#define TRACE_MEM_ST (1ULL << 6)    /* store (y/n) */
+#define TRACE_MEM_MMU_SHIFT 8       /* mmu idx */
 
-static inline uint8_t trace_mem_build_info(
-    int size_shift, bool sign_extend, MemOp endianness, bool store)
+static inline uint16_t trace_mem_build_info(
+    int size_shift, bool sign_extend, MemOp endianness,
+    bool store, unsigned int mmu_idx)
 {
-    uint8_t res;
+    uint16_t res;
 
     res = size_shift & TRACE_MEM_SZ_SHIFT_MASK;
     if (sign_extend) {
@@ -30,25 +32,36 @@ static inline uint8_t trace_mem_build_info(
     if (store) {
         res |= TRACE_MEM_ST;
     }
+#ifdef CONFIG_SOFTMMU
+    res |= mmu_idx << TRACE_MEM_MMU_SHIFT;
+#endif
     return res;
 }
 
-static inline uint8_t trace_mem_get_info(MemOp op, bool store)
+static inline uint16_t trace_mem_get_info(MemOp op,
+                                          unsigned int mmu_idx,
+                                          bool store)
 {
     return trace_mem_build_info(op & MO_SIZE, !!(op & MO_SIGN),
-                                op & MO_BSWAP, store);
+                                op & MO_BSWAP, store,
+                                mmu_idx);
+}
+
+/* Used by the atomic helpers */
+static inline
+uint16_t trace_mem_build_info_no_se_be(int size_shift, bool store,
+                                       TCGMemOpIdx oi)
+{
+    return trace_mem_build_info(size_shift, false, MO_BE, store,
+                                get_mmuidx(oi));
 }
 
 static inline
-uint8_t trace_mem_build_info_no_se_be(int size_shift, bool store)
+uint16_t trace_mem_build_info_no_se_le(int size_shift, bool store,
+                                       TCGMemOpIdx oi)
 {
-    return trace_mem_build_info(size_shift, false, MO_BE, store);
-}
-
-static inline
-uint8_t trace_mem_build_info_no_se_le(int size_shift, bool store)
-{
-    return trace_mem_build_info(size_shift, false, MO_LE, store);
+    return trace_mem_build_info(size_shift, false, MO_LE, store,
+                                get_mmuidx(oi));
 }
 
 #endif /* TRACE__MEM_INTERNAL_H */
diff --git a/trace/mem.h b/trace/mem.h
index 8cf213d85b..9644f592b4 100644
--- a/trace/mem.h
+++ b/trace/mem.h
@@ -18,15 +18,16 @@
  *
  * Return a value for the 'info' argument in guest memory access traces.
  */
-static uint8_t trace_mem_get_info(MemOp op, bool store);
+static uint16_t trace_mem_get_info(MemOp op, unsigned int mmu_idx, bool store);
 
 /**
  * trace_mem_build_info:
  *
  * Return a value for the 'info' argument in guest memory access traces.
  */
-static uint8_t trace_mem_build_info(int size_shift, bool sign_extend,
-                                    MemOp endianness, bool store);
+static uint16_t trace_mem_build_info(int size_shift, bool sign_extend,
+                                     MemOp endianness, bool store,
+                                     unsigned int mmuidx);
 
 
 #include "trace/mem-internal.h"
diff --git a/util/log.c b/util/log.c
index 1d1b33f7d9..1ca13059ee 100644
--- a/util/log.c
+++ b/util/log.c
@@ -273,6 +273,9 @@ const QEMULogItem qemu_log_items[] = {
     { CPU_LOG_TB_NOCHAIN, "nochain",
       "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n"
       "complete traces" },
+#ifdef CONFIG_PLUGIN
+    { CPU_LOG_PLUGIN, "plugin", "output from TCG plugins\n"},
+#endif
     { 0, NULL, NULL },
 };
 
diff --git a/vl.c b/vl.c
index c389d24b2c..6a65a64bfd 100644
--- a/vl.c
+++ b/vl.c
@@ -110,6 +110,7 @@ int main(int argc, char **argv)
 
 #include "trace-root.h"
 #include "trace/control.h"
+#include "qemu/plugin.h"
 #include "qemu/queue.h"
 #include "sysemu/arch_init.h"
 
@@ -2859,6 +2860,7 @@ int main(int argc, char **argv, char **envp)
     bool list_data_dirs = false;
     char *dir, **dirs;
     BlockdevOptionsQueue bdo_queue = QSIMPLEQ_HEAD_INITIALIZER(bdo_queue);
+    QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list);
 
     os_set_line_buffering();
 
@@ -2889,6 +2891,7 @@ int main(int argc, char **argv, char **envp)
     qemu_add_opts(&qemu_global_opts);
     qemu_add_opts(&qemu_mon_opts);
     qemu_add_opts(&qemu_trace_opts);
+    qemu_plugin_add_opts();
     qemu_add_opts(&qemu_option_rom_opts);
     qemu_add_opts(&qemu_machine_opts);
     qemu_add_opts(&qemu_accel_opts);
@@ -3684,6 +3687,9 @@ int main(int argc, char **argv, char **envp)
                 g_free(trace_file);
                 trace_file = trace_opt_parse(optarg);
                 break;
+            case QEMU_OPTION_plugin:
+                qemu_plugin_opt_parse(optarg, &plugin_list);
+                break;
             case QEMU_OPTION_readconfig:
                 {
                     int ret = qemu_read_config_file(optarg);
@@ -3997,6 +4003,11 @@ int main(int argc, char **argv, char **envp)
                                machine_class->default_machine_opts, 0);
     }
 
+    /* process plugin before CPUs are created, but once -smp has been parsed */
+    if (qemu_plugin_load_list(&plugin_list)) {
+        exit(1);
+    }
+
     qemu_opts_foreach(qemu_find_opts("device"),
                       default_driver_check, NULL, NULL);
     qemu_opts_foreach(qemu_find_opts("global"),