diff --git a/gdb/gdbserver/ChangeLog b/gdb/gdbserver/ChangeLog index 83a7848f83..d9ad6bb356 100644 --- a/gdb/gdbserver/ChangeLog +++ b/gdb/gdbserver/ChangeLog @@ -1,3 +1,70 @@ +2015-09-21 Pierre Langlois + + * Makefile.in (linux-aarch64-ipa.o, aarch64-ipa.o): New rules. + * configure.srv (aarch64*-*-linux*): Add linux-aarch64-ipa.o and + aarch64-ipa.o. + * linux-aarch64-ipa.c: New file. + * linux-aarch64-low.c: Include arch/aarch64-insn.h, inttypes.h + and endian.h. + (aarch64_get_thread_area): New target method. + (extract_signed_bitfield): New helper function. + (aarch64_decode_ldr_literal): New function. + (enum aarch64_opcodes): New enum. + (struct aarch64_register): New struct. + (struct aarch64_operand): New struct. + (x0): New static global. + (x1): Likewise. + (x2): Likewise. + (x3): Likewise. + (x4): Likewise. + (w2): Likewise. + (ip0): Likewise. + (sp): Likewise. + (xzr): Likewise. + (aarch64_register): New helper function. + (register_operand): Likewise. + (immediate_operand): Likewise. + (struct aarch64_memory_operand): New struct. + (offset_memory_operand): New helper function. + (preindex_memory_operand): Likewise. + (enum aarch64_system_control_registers): New enum. + (ENCODE): New macro. + (emit_insn): New helper function. + (emit_b): New function. + (emit_bcond): Likewise. + (emit_cb): Likewise. + (emit_tb): Likewise. + (emit_blr): Likewise. + (emit_stp): Likewise. + (emit_ldp_q_offset): Likewise. + (emit_stp_q_offset): Likewise. + (emit_load_store): Likewise. + (emit_ldr): Likewise. + (emit_ldrsw): Likewise. + (emit_str): Likewise. + (emit_ldaxr): Likewise. + (emit_stxr): Likewise. + (emit_stlr): Likewise. + (emit_data_processing_reg): Likewise. + (emit_data_processing): Likewise. + (emit_add): Likewise. + (emit_sub): Likewise. + (emit_mov): Likewise. + (emit_movk): Likewise. + (emit_mov_addr): Likewise. + (emit_mrs): Likewise. + (emit_msr): Likewise. + (emit_sevl): Likewise. + (emit_wfe): Likewise. + (append_insns): Likewise. + (can_encode_int32_in): New helper function. + (aarch64_relocate_instruction): New function. + (aarch64_install_fast_tracepoint_jump_pad): Likewise. + (aarch64_get_min_fast_tracepoint_insn_len): Likewise. + (struct linux_target_ops): Install aarch64_get_thread_area, + aarch64_install_fast_tracepoint_jump_pad and + aarch64_get_min_fast_tracepoint_insn_len. + 2015-09-21 Pierre Langlois * Makefile.in (aarch64-insn.o): New rule. diff --git a/gdb/gdbserver/Makefile.in b/gdb/gdbserver/Makefile.in index d096663645..cd146f4abd 100644 --- a/gdb/gdbserver/Makefile.in +++ b/gdb/gdbserver/Makefile.in @@ -499,6 +499,12 @@ linux-amd64-ipa.o: linux-amd64-ipa.c amd64-linux-ipa.o: amd64-linux.c $(IPAGENT_COMPILE) $< $(POSTCOMPILE) +linux-aarch64-ipa.o: linux-aarch64-ipa.c + $(IPAGENT_COMPILE) $< + $(POSTCOMPILE) +aarch64-ipa.o: aarch64.c + $(IPAGENT_COMPILE) $< + $(POSTCOMPILE) tdesc-ipa.o: tdesc.c $(IPAGENT_COMPILE) $< $(POSTCOMPILE) diff --git a/gdb/gdbserver/configure.srv b/gdb/gdbserver/configure.srv index a62df83d29..f187c9de0e 100644 --- a/gdb/gdbserver/configure.srv +++ b/gdb/gdbserver/configure.srv @@ -62,6 +62,7 @@ case "${target}" in srv_xmlfiles="${srv_xmlfiles} arm-with-neon.xml" srv_linux_regsets=yes srv_linux_thread_db=yes + ipa_obj="linux-aarch64-ipa.o aarch64-ipa.o" ;; arm*-*-linux*) srv_regobj="reg-arm.o arm-with-iwmmxt.o" srv_regobj="${srv_regobj} arm-with-vfpv2.o" diff --git a/gdb/gdbserver/linux-aarch64-ipa.c b/gdb/gdbserver/linux-aarch64-ipa.c new file mode 100644 index 0000000000..1aafc5f10a --- /dev/null +++ b/gdb/gdbserver/linux-aarch64-ipa.c @@ -0,0 +1,151 @@ +/* GNU/Linux/AArch64 specific low level interface, for the in-process + agent library for GDB. + + Copyright (C) 2015 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "server.h" +#include "tracepoint.h" + +/* Defined in auto-generated file aarch64.c. */ +void init_registers_aarch64 (void); +extern const struct target_desc *tdesc_aarch64; + +/* Each register saved by the jump pad is in a 16 byte cell. */ +#define FT_CR_SIZE 16 + +#define FT_CR_FPCR 0 +#define FT_CR_FPSR 1 +#define FT_CR_CPSR 2 +#define FT_CR_PC 3 +#define FT_CR_SP 4 +#define FT_CR_X0 5 +#define FT_CR_GPR(n) (FT_CR_X0 + (n)) +#define FT_CR_FPR(n) (FT_CR_GPR (31) + (n)) + +/* Mapping between registers collected by the jump pad and GDB's register + array layout used by regcache. + + See linux-aarch64-low.c (aarch64_install_fast_tracepoint_jump_pad) for + more details. */ + +static const int aarch64_ft_collect_regmap[] = { + FT_CR_GPR (0), + FT_CR_GPR (1), + FT_CR_GPR (2), + FT_CR_GPR (3), + FT_CR_GPR (4), + FT_CR_GPR (5), + FT_CR_GPR (6), + FT_CR_GPR (7), + FT_CR_GPR (8), + FT_CR_GPR (9), + FT_CR_GPR (10), + FT_CR_GPR (11), + FT_CR_GPR (12), + FT_CR_GPR (13), + FT_CR_GPR (14), + FT_CR_GPR (15), + FT_CR_GPR (16), + FT_CR_GPR (17), + FT_CR_GPR (18), + FT_CR_GPR (19), + FT_CR_GPR (20), + FT_CR_GPR (21), + FT_CR_GPR (22), + FT_CR_GPR (23), + FT_CR_GPR (24), + FT_CR_GPR (25), + FT_CR_GPR (26), + FT_CR_GPR (27), + FT_CR_GPR (28), + /* FP */ + FT_CR_GPR (29), + /* LR */ + FT_CR_GPR (30), + FT_CR_SP, + FT_CR_PC, + FT_CR_CPSR, + FT_CR_FPR (0), + FT_CR_FPR (1), + FT_CR_FPR (2), + FT_CR_FPR (3), + FT_CR_FPR (4), + FT_CR_FPR (5), + FT_CR_FPR (6), + FT_CR_FPR (7), + FT_CR_FPR (8), + FT_CR_FPR (9), + FT_CR_FPR (10), + FT_CR_FPR (11), + FT_CR_FPR (12), + FT_CR_FPR (13), + FT_CR_FPR (14), + FT_CR_FPR (15), + FT_CR_FPR (16), + FT_CR_FPR (17), + FT_CR_FPR (18), + FT_CR_FPR (19), + FT_CR_FPR (20), + FT_CR_FPR (21), + FT_CR_FPR (22), + FT_CR_FPR (23), + FT_CR_FPR (24), + FT_CR_FPR (25), + FT_CR_FPR (26), + FT_CR_FPR (27), + FT_CR_FPR (28), + FT_CR_FPR (29), + FT_CR_FPR (30), + FT_CR_FPR (31), + FT_CR_FPSR, + FT_CR_FPCR +}; + +#define AARCH64_NUM_FT_COLLECT_GREGS \ + (sizeof (aarch64_ft_collect_regmap) / sizeof(aarch64_ft_collect_regmap[0])) + +/* Fill in REGCACHE with registers saved by the jump pad in BUF. */ + +void +supply_fast_tracepoint_registers (struct regcache *regcache, + const unsigned char *buf) +{ + int i; + + for (i = 0; i < AARCH64_NUM_FT_COLLECT_GREGS; i++) + supply_register (regcache, i, + ((char *) buf) + + (aarch64_ft_collect_regmap[i] * FT_CR_SIZE)); +} + +IP_AGENT_EXPORT_FUNC ULONGEST +gdb_agent_get_raw_reg (const unsigned char *raw_regs, int regnum) +{ + if (regnum >= AARCH64_NUM_FT_COLLECT_GREGS) + return 0; + + return *(ULONGEST *) (raw_regs + + aarch64_ft_collect_regmap[regnum] * FT_CR_SIZE); +} + +void +initialize_low_tracepoint (void) +{ + init_registers_aarch64 (); + ipa_tdesc = tdesc_aarch64; +} diff --git a/gdb/gdbserver/linux-aarch64-low.c b/gdb/gdbserver/linux-aarch64-low.c index 0ba58ddcb8..8e007052e9 100644 --- a/gdb/gdbserver/linux-aarch64-low.c +++ b/gdb/gdbserver/linux-aarch64-low.c @@ -23,6 +23,7 @@ #include "linux-low.h" #include "nat/aarch64-linux.h" #include "nat/aarch64-linux-hw-point.h" +#include "arch/aarch64-insn.h" #include "linux-aarch32-low.h" #include "elf/common.h" @@ -30,6 +31,9 @@ #include #include "nat/gdb_ptrace.h" #include +#include +#include +#include #include "gdb_proc_service.h" @@ -559,6 +563,1463 @@ aarch64_supports_tracepoints (void) } } +/* Implementation of linux_target_ops method "get_thread_area". */ + +static int +aarch64_get_thread_area (int lwpid, CORE_ADDR *addrp) +{ + struct iovec iovec; + uint64_t reg; + + iovec.iov_base = ® + iovec.iov_len = sizeof (reg); + + if (ptrace (PTRACE_GETREGSET, lwpid, NT_ARM_TLS, &iovec) != 0) + return -1; + + *addrp = reg; + + return 0; +} + +/* Extract a signed value from a bit field within an instruction + encoding. + + INSN is the instruction opcode. + + WIDTH specifies the width of the bit field to extract (in bits). + + OFFSET specifies the least significant bit of the field where bits + are numbered zero counting from least to most significant. */ + +static int32_t +extract_signed_bitfield (uint32_t insn, unsigned width, unsigned offset) +{ + unsigned shift_l = sizeof (int32_t) * 8 - (offset + width); + unsigned shift_r = sizeof (int32_t) * 8 - width; + + return ((int32_t) insn << shift_l) >> shift_r; +} + +/* Decode an opcode if it represents an LDR or LDRSW instruction taking a + literal offset from the current PC. + + ADDR specifies the address of the opcode. + INSN specifies the opcode to test. + IS_W is set if the instruction is LDRSW. + IS64 receives size field from the decoded instruction. + RT receives the 'rt' field from the decoded instruction. + OFFSET receives the 'imm' field from the decoded instruction. + + Return 1 if the opcodes matches and is decoded, otherwise 0. */ + +int +aarch64_decode_ldr_literal (CORE_ADDR addr, uint32_t insn, int *is_w, + int *is64, unsigned *rt, int32_t *offset) +{ + /* LDR 0T01 1000 iiii iiii iiii iiii iiir rrrr */ + /* LDRSW 1001 1000 iiii iiii iiii iiii iiir rrrr */ + if ((insn & 0x3f000000) == 0x18000000) + { + *is_w = (insn >> 31) & 0x1; + + if (*is_w) + { + /* LDRSW always takes a 64-bit destination registers. */ + *is64 = 1; + } + else + *is64 = (insn >> 30) & 0x1; + + *rt = (insn >> 0) & 0x1f; + *offset = extract_signed_bitfield (insn, 19, 5) << 2; + + if (aarch64_debug) + debug_printf ("decode: %s 0x%x %s %s%u, #?\n", + core_addr_to_string_nz (addr), insn, + *is_w ? "ldrsw" : "ldr", + *is64 ? "x" : "w", *rt); + + return 1; + } + + return 0; +} + +/* List of opcodes that we need for building the jump pad and relocating + an instruction. */ + +enum aarch64_opcodes +{ + /* B 0001 01ii iiii iiii iiii iiii iiii iiii */ + /* BL 1001 01ii iiii iiii iiii iiii iiii iiii */ + /* B.COND 0101 0100 iiii iiii iiii iiii iii0 cccc */ + /* CBZ s011 0100 iiii iiii iiii iiii iiir rrrr */ + /* CBNZ s011 0101 iiii iiii iiii iiii iiir rrrr */ + /* TBZ b011 0110 bbbb biii iiii iiii iiir rrrr */ + /* TBNZ b011 0111 bbbb biii iiii iiii iiir rrrr */ + B = 0x14000000, + BL = 0x80000000 | B, + BCOND = 0x40000000 | B, + CBZ = 0x20000000 | B, + CBNZ = 0x21000000 | B, + TBZ = 0x36000000 | B, + TBNZ = 0x37000000 | B, + /* BLR 1101 0110 0011 1111 0000 00rr rrr0 0000 */ + BLR = 0xd63f0000, + /* STP s010 100o o0ii iiii irrr rrrr rrrr rrrr */ + /* LDP s010 100o o1ii iiii irrr rrrr rrrr rrrr */ + /* STP (SIMD&VFP) ss10 110o o0ii iiii irrr rrrr rrrr rrrr */ + /* LDP (SIMD&VFP) ss10 110o o1ii iiii irrr rrrr rrrr rrrr */ + STP = 0x28000000, + LDP = 0x28400000, + STP_SIMD_VFP = 0x04000000 | STP, + LDP_SIMD_VFP = 0x04000000 | LDP, + /* STR ss11 100o 00xi iiii iiii xxrr rrrr rrrr */ + /* LDR ss11 100o 01xi iiii iiii xxrr rrrr rrrr */ + /* LDRSW 1011 100o 10xi iiii iiii xxrr rrrr rrrr */ + STR = 0x38000000, + LDR = 0x00400000 | STR, + LDRSW = 0x80800000 | STR, + /* LDAXR ss00 1000 0101 1111 1111 11rr rrrr rrrr */ + LDAXR = 0x085ffc00, + /* STXR ss00 1000 000r rrrr 0111 11rr rrrr rrrr */ + STXR = 0x08007c00, + /* STLR ss00 1000 1001 1111 1111 11rr rrrr rrrr */ + STLR = 0x089ffc00, + /* MOV s101 0010 1xxi iiii iiii iiii iiir rrrr */ + /* MOVK s111 0010 1xxi iiii iiii iiii iiir rrrr */ + MOV = 0x52800000, + MOVK = 0x20000000 | MOV, + /* ADD s00o ooo1 xxxx xxxx xxxx xxxx xxxx xxxx */ + /* SUB s10o ooo1 xxxx xxxx xxxx xxxx xxxx xxxx */ + /* SUBS s11o ooo1 xxxx xxxx xxxx xxxx xxxx xxxx */ + ADD = 0x01000000, + SUB = 0x40000000 | ADD, + /* MSR (register) 1101 0101 0001 oooo oooo oooo ooor rrrr */ + /* MRS 1101 0101 0011 oooo oooo oooo ooor rrrr */ + MSR = 0xd5100000, + MRS = 0x00200000 | MSR, + /* HINT 1101 0101 0000 0011 0010 oooo ooo1 1111 */ + HINT = 0xd503201f, + SEVL = (5 << 5) | HINT, + WFE = (2 << 5) | HINT, +}; + +/* Representation of a general purpose register of the form xN or wN. + + This type is used by emitting functions that take registers as operands. */ + +struct aarch64_register +{ + unsigned num; + int is64; +}; + +/* Representation of an operand. At this time, it only supports register + and immediate types. */ + +struct aarch64_operand +{ + /* Type of the operand. */ + enum + { + OPERAND_IMMEDIATE, + OPERAND_REGISTER, + } type; + /* Value of the operand according to the type. */ + union + { + uint32_t imm; + struct aarch64_register reg; + }; +}; + +/* List of registers that we are currently using, we can add more here as + we need to use them. */ + +/* General purpose scratch registers (64 bit). */ +static const struct aarch64_register x0 = { 0, 1 }; +static const struct aarch64_register x1 = { 1, 1 }; +static const struct aarch64_register x2 = { 2, 1 }; +static const struct aarch64_register x3 = { 3, 1 }; +static const struct aarch64_register x4 = { 4, 1 }; + +/* General purpose scratch registers (32 bit). */ +static const struct aarch64_register w2 = { 2, 0 }; + +/* Intra-procedure scratch registers. */ +static const struct aarch64_register ip0 = { 16, 1 }; + +/* Special purpose registers. */ +static const struct aarch64_register sp = { 31, 1 }; +static const struct aarch64_register xzr = { 31, 1 }; + +/* Dynamically allocate a new register. If we know the register + statically, we should make it a global as above instead of using this + helper function. */ + +static struct aarch64_register +aarch64_register (unsigned num, int is64) +{ + return (struct aarch64_register) { num, is64 }; +} + +/* Helper function to create a register operand, for instructions with + different types of operands. + + For example: + p += emit_mov (p, x0, register_operand (x1)); */ + +static struct aarch64_operand +register_operand (struct aarch64_register reg) +{ + struct aarch64_operand operand; + + operand.type = OPERAND_REGISTER; + operand.reg = reg; + + return operand; +} + +/* Helper function to create an immediate operand, for instructions with + different types of operands. + + For example: + p += emit_mov (p, x0, immediate_operand (12)); */ + +static struct aarch64_operand +immediate_operand (uint32_t imm) +{ + struct aarch64_operand operand; + + operand.type = OPERAND_IMMEDIATE; + operand.imm = imm; + + return operand; +} + +/* Representation of a memory operand, used for load and store + instructions. + + The types correspond to the following variants: + + MEMORY_OPERAND_OFFSET: LDR rt, [rn, #offset] + MEMORY_OPERAND_PREINDEX: LDR rt, [rn, #index]! */ + +struct aarch64_memory_operand +{ + /* Type of the operand. */ + enum + { + MEMORY_OPERAND_OFFSET, + MEMORY_OPERAND_PREINDEX, + } type; + /* Index from the base register. */ + int32_t index; +}; + +/* Helper function to create an offset memory operand. + + For example: + p += emit_ldr (p, x0, sp, offset_memory_operand (16)); */ + +static struct aarch64_memory_operand +offset_memory_operand (int32_t offset) +{ + return (struct aarch64_memory_operand) { MEMORY_OPERAND_OFFSET, offset }; +} + +/* Helper function to create a pre-index memory operand. + + For example: + p += emit_ldr (p, x0, sp, preindex_memory_operand (16)); */ + +static struct aarch64_memory_operand +preindex_memory_operand (int32_t index) +{ + return (struct aarch64_memory_operand) { MEMORY_OPERAND_PREINDEX, index }; +} + +/* System control registers. These special registers can be written and + read with the MRS and MSR instructions. + + - NZCV: Condition flags. GDB refers to this register under the CPSR + name. + - FPSR: Floating-point status register. + - FPCR: Floating-point control registers. + - TPIDR_EL0: Software thread ID register. */ + +enum aarch64_system_control_registers +{ + /* op0 op1 crn crm op2 */ + NZCV = (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x2 << 3) | 0x0, + FPSR = (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x4 << 3) | 0x1, + FPCR = (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x4 << 3) | 0x0, + TPIDR_EL0 = (0x1 << 14) | (0x3 << 11) | (0xd << 7) | (0x0 << 3) | 0x2 +}; + +/* Helper macro to mask and shift a value into a bitfield. */ + +#define ENCODE(val, size, offset) \ + ((uint32_t) ((val & ((1ULL << size) - 1)) << offset)) + +/* Write a 32-bit unsigned integer INSN info *BUF. Return the number of + instructions written (aka. 1). */ + +static int +emit_insn (uint32_t *buf, uint32_t insn) +{ + *buf = insn; + return 1; +} + +/* Write a B or BL instruction into *BUF. + + B #offset + BL #offset + + IS_BL specifies if the link register should be updated. + OFFSET is the immediate offset from the current PC. It is + byte-addressed but should be 4 bytes aligned. It has a limited range of + +/- 128MB (26 bits << 2). */ + +static int +emit_b (uint32_t *buf, int is_bl, int32_t offset) +{ + uint32_t imm26 = ENCODE (offset >> 2, 26, 0); + + if (is_bl) + return emit_insn (buf, BL | imm26); + else + return emit_insn (buf, B | imm26); +} + +/* Write a BCOND instruction into *BUF. + + B.COND #offset + + COND specifies the condition field. + OFFSET is the immediate offset from the current PC. It is + byte-addressed but should be 4 bytes aligned. It has a limited range of + +/- 1MB (19 bits << 2). */ + +static int +emit_bcond (uint32_t *buf, unsigned cond, int32_t offset) +{ + return emit_insn (buf, BCOND | ENCODE (offset >> 2, 19, 5) + | ENCODE (cond, 4, 0)); +} + +/* Write a CBZ or CBNZ instruction into *BUF. + + CBZ rt, #offset + CBNZ rt, #offset + + IS_CBNZ distinguishes between CBZ and CBNZ instructions. + RN is the register to test. + OFFSET is the immediate offset from the current PC. It is + byte-addressed but should be 4 bytes aligned. It has a limited range of + +/- 1MB (19 bits << 2). */ + +static int +emit_cb (uint32_t *buf, int is_cbnz, struct aarch64_register rt, + int32_t offset) +{ + uint32_t imm19 = ENCODE (offset >> 2, 19, 5); + uint32_t sf = ENCODE (rt.is64, 1, 31); + + if (is_cbnz) + return emit_insn (buf, CBNZ | sf | imm19 | ENCODE (rt.num, 5, 0)); + else + return emit_insn (buf, CBZ | sf | imm19 | ENCODE (rt.num, 5, 0)); +} + +/* Write a TBZ or TBNZ instruction into *BUF. + + TBZ rt, #bit, #offset + TBNZ rt, #bit, #offset + + IS_TBNZ distinguishes between TBZ and TBNZ instructions. + RT is the register to test. + BIT is the index of the bit to test in register RT. + OFFSET is the immediate offset from the current PC. It is + byte-addressed but should be 4 bytes aligned. It has a limited range of + +/- 32KB (14 bits << 2). */ + +static int +emit_tb (uint32_t *buf, int is_tbnz, unsigned bit, + struct aarch64_register rt, int32_t offset) +{ + uint32_t imm14 = ENCODE (offset >> 2, 14, 5); + uint32_t b40 = ENCODE (bit, 5, 19); + uint32_t b5 = ENCODE (bit >> 5, 1, 31); + + if (is_tbnz) + return emit_insn (buf, TBNZ | b5 | b40 | imm14 | ENCODE (rt.num, 5, 0)); + else + return emit_insn (buf, TBZ | b5 | b40 | imm14 | ENCODE (rt.num, 5, 0)); +} + +/* Write a BLR instruction into *BUF. + + BLR rn + + RN is the register to branch to. */ + +static int +emit_blr (uint32_t *buf, struct aarch64_register rn) +{ + return emit_insn (buf, BLR | ENCODE (rn.num, 5, 5)); +} + +/* Write a STP instruction into *BUF. + + STP rt, rt2, [rn, #offset] + STP rt, rt2, [rn, #index]! + + RT and RT2 are the registers to store. + RN is the base address register. + OFFSET is the immediate to add to the base address. It is limited to a + -512 .. 504 range (7 bits << 3). */ + +static int +emit_stp (uint32_t *buf, struct aarch64_register rt, + struct aarch64_register rt2, struct aarch64_register rn, + struct aarch64_memory_operand operand) +{ + uint32_t opc; + uint32_t pre_index; + uint32_t write_back; + + if (rt.is64) + opc = ENCODE (2, 2, 30); + else + opc = ENCODE (0, 2, 30); + + switch (operand.type) + { + case MEMORY_OPERAND_OFFSET: + { + pre_index = ENCODE (1, 1, 24); + write_back = ENCODE (0, 1, 23); + break; + } + case MEMORY_OPERAND_PREINDEX: + { + pre_index = ENCODE (1, 1, 24); + write_back = ENCODE (1, 1, 23); + break; + } + default: + return 0; + } + + return emit_insn (buf, STP | opc | pre_index | write_back + | ENCODE (operand.index >> 3, 7, 15) | ENCODE (rt2.num, 5, 10) + | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0)); +} + +/* Write a LDP (SIMD&VFP) instruction using Q registers into *BUF. + + LDP qt, qt2, [rn, #offset] + + RT and RT2 are the Q registers to store. + RN is the base address register. + OFFSET is the immediate to add to the base address. It is limited to + -1024 .. 1008 range (7 bits << 4). */ + +static int +emit_ldp_q_offset (uint32_t *buf, unsigned rt, unsigned rt2, + struct aarch64_register rn, int32_t offset) +{ + uint32_t opc = ENCODE (2, 2, 30); + uint32_t pre_index = ENCODE (1, 1, 24); + + return emit_insn (buf, LDP_SIMD_VFP | opc | pre_index + | ENCODE (offset >> 4, 7, 15) | ENCODE (rt2, 5, 10) + | ENCODE (rn.num, 5, 5) | ENCODE (rt, 5, 0)); +} + +/* Write a STP (SIMD&VFP) instruction using Q registers into *BUF. + + STP qt, qt2, [rn, #offset] + + RT and RT2 are the Q registers to store. + RN is the base address register. + OFFSET is the immediate to add to the base address. It is limited to + -1024 .. 1008 range (7 bits << 4). */ + +static int +emit_stp_q_offset (uint32_t *buf, unsigned rt, unsigned rt2, + struct aarch64_register rn, int32_t offset) +{ + uint32_t opc = ENCODE (2, 2, 30); + uint32_t pre_index = ENCODE (1, 1, 24); + + return emit_insn (buf, STP_SIMD_VFP | opc | pre_index + | ENCODE (offset >> 4, 7, 15) | ENCODE (rt2, 5, 10) + | ENCODE (rn.num, 5, 5) | ENCODE (rt, 5, 0)); +} + +/* Helper function emitting a load or store instruction. */ + +static int +emit_load_store (uint32_t *buf, uint32_t size, enum aarch64_opcodes opcode, + struct aarch64_register rt, struct aarch64_register rn, + struct aarch64_memory_operand operand) +{ + uint32_t op; + + switch (operand.type) + { + case MEMORY_OPERAND_OFFSET: + { + op = ENCODE (1, 1, 24); + + return emit_insn (buf, opcode | ENCODE (size, 2, 30) | op + | ENCODE (operand.index >> 3, 12, 10) + | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0)); + } + case MEMORY_OPERAND_PREINDEX: + { + uint32_t pre_index = ENCODE (3, 2, 10); + + op = ENCODE (0, 1, 24); + + return emit_insn (buf, opcode | ENCODE (size, 2, 30) | op + | pre_index | ENCODE (operand.index, 9, 12) + | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0)); + } + default: + return 0; + } +} + +/* Write a LDR instruction into *BUF. + + LDR rt, [rn, #offset] + LDR rt, [rn, #index]! + + RT is the register to store. + RN is the base address register. + OFFSET is the immediate to add to the base address. It is limited to + 0 .. 32760 range (12 bits << 3). */ + +static int +emit_ldr (uint32_t *buf, struct aarch64_register rt, + struct aarch64_register rn, struct aarch64_memory_operand operand) +{ + return emit_load_store (buf, rt.is64 ? 3 : 2, LDR, rt, rn, operand); +} + +/* Write a LDRSW instruction into *BUF. The register size is 64-bit. + + LDRSW xt, [rn, #offset] + LDRSW xt, [rn, #index]! + + RT is the register to store. + RN is the base address register. + OFFSET is the immediate to add to the base address. It is limited to + 0 .. 16380 range (12 bits << 2). */ + +static int +emit_ldrsw (uint32_t *buf, struct aarch64_register rt, + struct aarch64_register rn, + struct aarch64_memory_operand operand) +{ + return emit_load_store (buf, 3, LDRSW, rt, rn, operand); +} + +/* Write a STR instruction into *BUF. + + STR rt, [rn, #offset] + STR rt, [rn, #index]! + + RT is the register to store. + RN is the base address register. + OFFSET is the immediate to add to the base address. It is limited to + 0 .. 32760 range (12 bits << 3). */ + +static int +emit_str (uint32_t *buf, struct aarch64_register rt, + struct aarch64_register rn, + struct aarch64_memory_operand operand) +{ + return emit_load_store (buf, rt.is64 ? 3 : 2, STR, rt, rn, operand); +} + +/* Helper function emitting an exclusive load or store instruction. */ + +static int +emit_load_store_exclusive (uint32_t *buf, uint32_t size, + enum aarch64_opcodes opcode, + struct aarch64_register rs, + struct aarch64_register rt, + struct aarch64_register rt2, + struct aarch64_register rn) +{ + return emit_insn (buf, opcode | ENCODE (size, 2, 30) + | ENCODE (rs.num, 5, 16) | ENCODE (rt2.num, 5, 10) + | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0)); +} + +/* Write a LAXR instruction into *BUF. + + LDAXR rt, [xn] + + RT is the destination register. + RN is the base address register. */ + +static int +emit_ldaxr (uint32_t *buf, struct aarch64_register rt, + struct aarch64_register rn) +{ + return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, LDAXR, xzr, rt, + xzr, rn); +} + +/* Write a STXR instruction into *BUF. + + STXR ws, rt, [xn] + + RS is the result register, it indicates if the store succeeded or not. + RT is the destination register. + RN is the base address register. */ + +static int +emit_stxr (uint32_t *buf, struct aarch64_register rs, + struct aarch64_register rt, struct aarch64_register rn) +{ + return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, STXR, rs, rt, + xzr, rn); +} + +/* Write a STLR instruction into *BUF. + + STLR rt, [xn] + + RT is the register to store. + RN is the base address register. */ + +static int +emit_stlr (uint32_t *buf, struct aarch64_register rt, + struct aarch64_register rn) +{ + return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, STLR, xzr, rt, + xzr, rn); +} + +/* Helper function for data processing instructions with register sources. */ + +static int +emit_data_processing_reg (uint32_t *buf, enum aarch64_opcodes opcode, + struct aarch64_register rd, + struct aarch64_register rn, + struct aarch64_register rm) +{ + uint32_t size = ENCODE (rd.is64, 1, 31); + + return emit_insn (buf, opcode | size | ENCODE (rm.num, 5, 16) + | ENCODE (rn.num, 5, 5) | ENCODE (rd.num, 5, 0)); +} + +/* Helper function for data processing instructions taking either a register + or an immediate. */ + +static int +emit_data_processing (uint32_t *buf, enum aarch64_opcodes opcode, + struct aarch64_register rd, + struct aarch64_register rn, + struct aarch64_operand operand) +{ + uint32_t size = ENCODE (rd.is64, 1, 31); + /* The opcode is different for register and immediate source operands. */ + uint32_t operand_opcode; + + if (operand.type == OPERAND_IMMEDIATE) + { + /* xxx1 000x xxxx xxxx xxxx xxxx xxxx xxxx */ + operand_opcode = ENCODE (8, 4, 25); + + return emit_insn (buf, opcode | operand_opcode | size + | ENCODE (operand.imm, 12, 10) + | ENCODE (rn.num, 5, 5) | ENCODE (rd.num, 5, 0)); + } + else + { + /* xxx0 101x xxxx xxxx xxxx xxxx xxxx xxxx */ + operand_opcode = ENCODE (5, 4, 25); + + return emit_data_processing_reg (buf, opcode | operand_opcode, rd, + rn, operand.reg); + } +} + +/* Write an ADD instruction into *BUF. + + ADD rd, rn, #imm + ADD rd, rn, rm + + This function handles both an immediate and register add. + + RD is the destination register. + RN is the input register. + OPERAND is the source operand, either of type OPERAND_IMMEDIATE or + OPERAND_REGISTER. */ + +static int +emit_add (uint32_t *buf, struct aarch64_register rd, + struct aarch64_register rn, struct aarch64_operand operand) +{ + return emit_data_processing (buf, ADD, rd, rn, operand); +} + +/* Write a SUB instruction into *BUF. + + SUB rd, rn, #imm + SUB rd, rn, rm + + This function handles both an immediate and register sub. + + RD is the destination register. + RN is the input register. + IMM is the immediate to substract to RN. */ + +static int +emit_sub (uint32_t *buf, struct aarch64_register rd, + struct aarch64_register rn, struct aarch64_operand operand) +{ + return emit_data_processing (buf, SUB, rd, rn, operand); +} + +/* Write a MOV instruction into *BUF. + + MOV rd, #imm + MOV rd, rm + + This function handles both a wide immediate move and a register move, + with the condition that the source register is not xzr. xzr and the + stack pointer share the same encoding and this function only supports + the stack pointer. + + RD is the destination register. + OPERAND is the source operand, either of type OPERAND_IMMEDIATE or + OPERAND_REGISTER. */ + +static int +emit_mov (uint32_t *buf, struct aarch64_register rd, + struct aarch64_operand operand) +{ + if (operand.type == OPERAND_IMMEDIATE) + { + uint32_t size = ENCODE (rd.is64, 1, 31); + /* Do not shift the immediate. */ + uint32_t shift = ENCODE (0, 2, 21); + + return emit_insn (buf, MOV | size | shift + | ENCODE (operand.imm, 16, 5) + | ENCODE (rd.num, 5, 0)); + } + else + return emit_add (buf, rd, operand.reg, immediate_operand (0)); +} + +/* Write a MOVK instruction into *BUF. + + MOVK rd, #imm, lsl #shift + + RD is the destination register. + IMM is the immediate. + SHIFT is the logical shift left to apply to IMM. */ + +static int +emit_movk (uint32_t *buf, struct aarch64_register rd, uint32_t imm, unsigned shift) +{ + uint32_t size = ENCODE (rd.is64, 1, 31); + + return emit_insn (buf, MOVK | size | ENCODE (shift, 2, 21) | + ENCODE (imm, 16, 5) | ENCODE (rd.num, 5, 0)); +} + +/* Write instructions into *BUF in order to move ADDR into a register. + ADDR can be a 64-bit value. + + This function will emit a series of MOV and MOVK instructions, such as: + + MOV xd, #(addr) + MOVK xd, #(addr >> 16), lsl #16 + MOVK xd, #(addr >> 32), lsl #32 + MOVK xd, #(addr >> 48), lsl #48 */ + +static int +emit_mov_addr (uint32_t *buf, struct aarch64_register rd, CORE_ADDR addr) +{ + uint32_t *p = buf; + + /* The MOV (wide immediate) instruction clears to top bits of the + register. */ + p += emit_mov (p, rd, immediate_operand (addr & 0xffff)); + + if ((addr >> 16) != 0) + p += emit_movk (p, rd, (addr >> 16) & 0xffff, 1); + else + return p - buf; + + if ((addr >> 32) != 0) + p += emit_movk (p, rd, (addr >> 32) & 0xffff, 2); + else + return p - buf; + + if ((addr >> 48) != 0) + p += emit_movk (p, rd, (addr >> 48) & 0xffff, 3); + + return p - buf; +} + +/* Write a MRS instruction into *BUF. The register size is 64-bit. + + MRS xt, system_reg + + RT is the destination register. + SYSTEM_REG is special purpose register to read. */ + +static int +emit_mrs (uint32_t *buf, struct aarch64_register rt, + enum aarch64_system_control_registers system_reg) +{ + return emit_insn (buf, MRS | ENCODE (system_reg, 15, 5) + | ENCODE (rt.num, 5, 0)); +} + +/* Write a MSR instruction into *BUF. The register size is 64-bit. + + MSR system_reg, xt + + SYSTEM_REG is special purpose register to write. + RT is the input register. */ + +static int +emit_msr (uint32_t *buf, enum aarch64_system_control_registers system_reg, + struct aarch64_register rt) +{ + return emit_insn (buf, MSR | ENCODE (system_reg, 15, 5) + | ENCODE (rt.num, 5, 0)); +} + +/* Write a SEVL instruction into *BUF. + + This is a hint instruction telling the hardware to trigger an event. */ + +static int +emit_sevl (uint32_t *buf) +{ + return emit_insn (buf, SEVL); +} + +/* Write a WFE instruction into *BUF. + + This is a hint instruction telling the hardware to wait for an event. */ + +static int +emit_wfe (uint32_t *buf) +{ + return emit_insn (buf, WFE); +} + +/* Write LEN instructions from BUF into the inferior memory at *TO. + + Note instructions are always little endian on AArch64, unlike data. */ + +static void +append_insns (CORE_ADDR *to, size_t len, const uint32_t *buf) +{ + size_t byte_len = len * sizeof (uint32_t); +#if (__BYTE_ORDER == __BIG_ENDIAN) + uint32_t *le_buf = xmalloc (byte_len); + size_t i; + + for (i = 0; i < len; i++) + le_buf[i] = htole32 (buf[i]); + + write_inferior_memory (*to, (const unsigned char *) le_buf, byte_len); + + xfree (le_buf); +#else + write_inferior_memory (*to, (const unsigned char *) buf, byte_len); +#endif + + *to += byte_len; +} + +/* Helper function. Return 1 if VAL can be encoded in BITS bits. */ + +static int +can_encode_int32 (int32_t val, unsigned bits) +{ + /* This must be an arithemic shift. */ + int32_t rest = val >> bits; + + return rest == 0 || rest == -1; +} + +/* Relocate an instruction from OLDLOC to *TO. This function will also + increment TO by the number of bytes the new instruction(s) take(s). + + PC relative instructions need to be handled specifically: + + - B/BL + - B.COND + - CBZ/CBNZ + - TBZ/TBNZ + - ADR/ADRP + - LDR/LDRSW (literal) */ + +static void +aarch64_relocate_instruction (CORE_ADDR *to, CORE_ADDR oldloc) +{ + uint32_t buf[32]; + uint32_t *p = buf; + uint32_t insn; + + int is_bl; + int is64; + int is_sw; + int is_cbnz; + int is_tbnz; + int is_adrp; + unsigned rn; + unsigned rt; + unsigned rd; + unsigned cond; + unsigned bit; + int32_t offset; + + target_read_uint32 (oldloc, &insn); + + if (aarch64_decode_b (oldloc, insn, &is_bl, &offset)) + { + offset = (oldloc - *to + offset); + + if (can_encode_int32 (offset, 28)) + p += emit_b (p, is_bl, offset); + else + return; + } + else if (aarch64_decode_bcond (oldloc, insn, &cond, &offset)) + { + offset = (oldloc - *to + offset); + + if (can_encode_int32 (offset, 21)) + p += emit_bcond (p, cond, offset); + else if (can_encode_int32 (offset, 28)) + { + /* The offset is out of range for a conditional branch + instruction but not for a unconditional branch. We can use + the following instructions instead: + + B.COND TAKEN ; If cond is true, then jump to TAKEN. + B NOT_TAKEN ; Else jump over TAKEN and continue. + TAKEN: + B #(offset - 8) + NOT_TAKEN: + + */ + + p += emit_bcond (p, cond, 8); + p += emit_b (p, 0, 8); + p += emit_b (p, 0, offset - 8); + } + else + return; + } + else if (aarch64_decode_cb (oldloc, insn, &is64, &is_cbnz, &rn, &offset)) + { + offset = (oldloc - *to + offset); + + if (can_encode_int32 (offset, 21)) + p += emit_cb (p, is_cbnz, aarch64_register (rn, is64), offset); + else if (can_encode_int32 (offset, 28)) + { + /* The offset is out of range for a compare and branch + instruction but not for a unconditional branch. We can use + the following instructions instead: + + CBZ xn, TAKEN ; xn == 0, then jump to TAKEN. + B NOT_TAKEN ; Else jump over TAKEN and continue. + TAKEN: + B #(offset - 8) + NOT_TAKEN: + + */ + p += emit_cb (p, is_cbnz, aarch64_register (rn, is64), 8); + p += emit_b (p, 0, 8); + p += emit_b (p, 0, offset - 8); + } + else + return; + } + else if (aarch64_decode_tb (oldloc, insn, &is_tbnz, &bit, &rt, &offset)) + { + offset = (oldloc - *to + offset); + + if (can_encode_int32 (offset, 16)) + p += emit_tb (p, is_tbnz, bit, aarch64_register (rt, 1), offset); + else if (can_encode_int32 (offset, 28)) + { + /* The offset is out of range for a test bit and branch + instruction but not for a unconditional branch. We can use + the following instructions instead: + + TBZ xn, #bit, TAKEN ; xn[bit] == 0, then jump to TAKEN. + B NOT_TAKEN ; Else jump over TAKEN and continue. + TAKEN: + B #(offset - 8) + NOT_TAKEN: + + */ + p += emit_tb (p, is_tbnz, bit, aarch64_register (rt, 1), 8); + p += emit_b (p, 0, 8); + p += emit_b (p, 0, offset - 8); + } + else + return; + } + else if (aarch64_decode_adr (oldloc, insn, &is_adrp, &rd, &offset)) + { + + /* We know exactly the address the ADR{P,} instruction will compute. + We can just write it to the destination register. */ + CORE_ADDR address = oldloc + offset; + + if (is_adrp) + { + /* Clear the lower 12 bits of the offset to get the 4K page. */ + p += emit_mov_addr (p, aarch64_register (rd, 1), + address & ~0xfff); + } + else + p += emit_mov_addr (p, aarch64_register (rd, 1), address); + } + else if (aarch64_decode_ldr_literal (oldloc, insn, &is_sw, &is64, &rt, + &offset)) + { + /* We know exactly what address to load from, and what register we + can use: + + MOV xd, #(oldloc + offset) + MOVK xd, #((oldloc + offset) >> 16), lsl #16 + ... + + LDR xd, [xd] ; or LDRSW xd, [xd] + + */ + CORE_ADDR address = oldloc + offset; + + p += emit_mov_addr (p, aarch64_register (rt, 1), address); + + if (is_sw) + p += emit_ldrsw (p, aarch64_register (rt, 1), + aarch64_register (rt, 1), + offset_memory_operand (0)); + else + p += emit_ldr (p, aarch64_register (rt, is64), + aarch64_register (rt, 1), + offset_memory_operand (0)); + } + else + { + /* The instruction is not PC relative. Just re-emit it at the new + location. */ + p += emit_insn (p, insn); + } + + append_insns (to, p - buf, buf); +} + +/* Implementation of linux_target_ops method + "install_fast_tracepoint_jump_pad". */ + +static int +aarch64_install_fast_tracepoint_jump_pad (CORE_ADDR tpoint, + CORE_ADDR tpaddr, + CORE_ADDR collector, + CORE_ADDR lockaddr, + ULONGEST orig_size, + CORE_ADDR *jump_entry, + CORE_ADDR *trampoline, + ULONGEST *trampoline_size, + unsigned char *jjump_pad_insn, + ULONGEST *jjump_pad_insn_size, + CORE_ADDR *adjusted_insn_addr, + CORE_ADDR *adjusted_insn_addr_end, + char *err) +{ + uint32_t buf[256]; + uint32_t *p = buf; + int32_t offset; + int i; + CORE_ADDR buildaddr = *jump_entry; + + /* We need to save the current state on the stack both to restore it + later and to collect register values when the tracepoint is hit. + + The saved registers are pushed in a layout that needs to be in sync + with aarch64_ft_collect_regmap (see linux-aarch64-ipa.c). Later on + the supply_fast_tracepoint_registers function will fill in the + register cache from a pointer to saved registers on the stack we build + here. + + For simplicity, we set the size of each cell on the stack to 16 bytes. + This way one cell can hold any register type, from system registers + to the 128 bit SIMD&FP registers. Furthermore, the stack pointer + has to be 16 bytes aligned anyway. + + Note that the CPSR register does not exist on AArch64. Instead we + can access system bits describing the process state with the + MRS/MSR instructions, namely the condition flags. We save them as + if they are part of a CPSR register because that's how GDB + interprets these system bits. At the moment, only the condition + flags are saved in CPSR (NZCV). + + Stack layout, each cell is 16 bytes (descending): + + High *-------- SIMD&FP registers from 31 down to 0. --------* + | q31 | + . . + . . 32 cells + . . + | q0 | + *---- General purpose registers from 30 down to 0. ----* + | x30 | + . . + . . 31 cells + . . + | x0 | + *------------- Special purpose registers. -------------* + | SP | + | PC | + | CPSR (NZCV) | 5 cells + | FPSR | + | FPCR | <- SP + 16 + *------------- collecting_t object --------------------* + | TPIDR_EL0 | struct tracepoint * | + Low *------------------------------------------------------* + + After this stack is set up, we issue a call to the collector, passing + it the saved registers at (SP + 16). */ + + /* Push SIMD&FP registers on the stack: + + SUB sp, sp, #(32 * 16) + + STP q30, q31, [sp, #(30 * 16)] + ... + STP q0, q1, [sp] + + */ + p += emit_sub (p, sp, sp, immediate_operand (32 * 16)); + for (i = 30; i >= 0; i -= 2) + p += emit_stp_q_offset (p, i, i + 1, sp, i * 16); + + /* Push general puspose registers on the stack. Note that we do not need + to push x31 as it represents the xzr register and not the stack + pointer in a STR instruction. + + SUB sp, sp, #(31 * 16) + + STR x30, [sp, #(30 * 16)] + ... + STR x0, [sp] + + */ + p += emit_sub (p, sp, sp, immediate_operand (31 * 16)); + for (i = 30; i >= 0; i -= 1) + p += emit_str (p, aarch64_register (i, 1), sp, + offset_memory_operand (i * 16)); + + /* Make space for 5 more cells. + + SUB sp, sp, #(5 * 16) + + */ + p += emit_sub (p, sp, sp, immediate_operand (5 * 16)); + + + /* Save SP: + + ADD x4, sp, #((32 + 31 + 5) * 16) + STR x4, [sp, #(4 * 16)] + + */ + p += emit_add (p, x4, sp, immediate_operand ((32 + 31 + 5) * 16)); + p += emit_str (p, x4, sp, offset_memory_operand (4 * 16)); + + /* Save PC (tracepoint address): + + MOV x3, #(tpaddr) + ... + + STR x3, [sp, #(3 * 16)] + + */ + + p += emit_mov_addr (p, x3, tpaddr); + p += emit_str (p, x3, sp, offset_memory_operand (3 * 16)); + + /* Save CPSR (NZCV), FPSR and FPCR: + + MRS x2, nzcv + MRS x1, fpsr + MRS x0, fpcr + + STR x2, [sp, #(2 * 16)] + STR x1, [sp, #(1 * 16)] + STR x0, [sp, #(0 * 16)] + + */ + p += emit_mrs (p, x2, NZCV); + p += emit_mrs (p, x1, FPSR); + p += emit_mrs (p, x0, FPCR); + p += emit_str (p, x2, sp, offset_memory_operand (2 * 16)); + p += emit_str (p, x1, sp, offset_memory_operand (1 * 16)); + p += emit_str (p, x0, sp, offset_memory_operand (0 * 16)); + + /* Push the collecting_t object. It consist of the address of the + tracepoint and an ID for the current thread. We get the latter by + reading the tpidr_el0 system register. It corresponds to the + NT_ARM_TLS register accessible with ptrace. + + MOV x0, #(tpoint) + ... + + MRS x1, tpidr_el0 + + STP x0, x1, [sp, #-16]! + + */ + + p += emit_mov_addr (p, x0, tpoint); + p += emit_mrs (p, x1, TPIDR_EL0); + p += emit_stp (p, x0, x1, sp, preindex_memory_operand (-16)); + + /* Spin-lock: + + The shared memory for the lock is at lockaddr. It will hold zero + if no-one is holding the lock, otherwise it contains the address of + the collecting_t object on the stack of the thread which acquired it. + + At this stage, the stack pointer points to this thread's collecting_t + object. + + We use the following registers: + - x0: Address of the lock. + - x1: Pointer to collecting_t object. + - x2: Scratch register. + + MOV x0, #(lockaddr) + ... + MOV x1, sp + + ; Trigger an event local to this core. So the following WFE + ; instruction is ignored. + SEVL + again: + ; Wait for an event. The event is triggered by either the SEVL + ; or STLR instructions (store release). + WFE + + ; Atomically read at lockaddr. This marks the memory location as + ; exclusive. This instruction also has memory constraints which + ; make sure all previous data reads and writes are done before + ; executing it. + LDAXR x2, [x0] + + ; Try again if another thread holds the lock. + CBNZ x2, again + + ; We can lock it! Write the address of the collecting_t object. + ; This instruction will fail if the memory location is not marked + ; as exclusive anymore. If it succeeds, it will remove the + ; exclusive mark on the memory location. This way, if another + ; thread executes this instruction before us, we will fail and try + ; all over again. + STXR w2, x1, [x0] + CBNZ w2, again + + */ + + p += emit_mov_addr (p, x0, lockaddr); + p += emit_mov (p, x1, register_operand (sp)); + + p += emit_sevl (p); + p += emit_wfe (p); + p += emit_ldaxr (p, x2, x0); + p += emit_cb (p, 1, w2, -2 * 4); + p += emit_stxr (p, w2, x1, x0); + p += emit_cb (p, 1, x2, -4 * 4); + + /* Call collector (struct tracepoint *, unsigned char *): + + MOV x0, #(tpoint) + ... + + ; Saved registers start after the collecting_t object. + ADD x1, sp, #16 + + ; We use an intra-procedure-call scratch register. + MOV ip0, #(collector) + ... + + ; And call back to C! + BLR ip0 + + */ + + p += emit_mov_addr (p, x0, tpoint); + p += emit_add (p, x1, sp, immediate_operand (16)); + + p += emit_mov_addr (p, ip0, collector); + p += emit_blr (p, ip0); + + /* Release the lock. + + MOV x0, #(lockaddr) + ... + + ; This instruction is a normal store with memory ordering + ; constraints. Thanks to this we do not have to put a data + ; barrier instruction to make sure all data read and writes are done + ; before this instruction is executed. Furthermore, this instrucion + ; will trigger an event, letting other threads know they can grab + ; the lock. + STLR xzr, [x0] + + */ + p += emit_mov_addr (p, x0, lockaddr); + p += emit_stlr (p, xzr, x0); + + /* Free collecting_t object: + + ADD sp, sp, #16 + + */ + p += emit_add (p, sp, sp, immediate_operand (16)); + + /* Restore CPSR (NZCV), FPSR and FPCR. And free all special purpose + registers from the stack. + + LDR x2, [sp, #(2 * 16)] + LDR x1, [sp, #(1 * 16)] + LDR x0, [sp, #(0 * 16)] + + MSR NZCV, x2 + MSR FPSR, x1 + MSR FPCR, x0 + + ADD sp, sp #(5 * 16) + + */ + p += emit_ldr (p, x2, sp, offset_memory_operand (2 * 16)); + p += emit_ldr (p, x1, sp, offset_memory_operand (1 * 16)); + p += emit_ldr (p, x0, sp, offset_memory_operand (0 * 16)); + p += emit_msr (p, NZCV, x2); + p += emit_msr (p, FPSR, x1); + p += emit_msr (p, FPCR, x0); + + p += emit_add (p, sp, sp, immediate_operand (5 * 16)); + + /* Pop general purpose registers: + + LDR x0, [sp] + ... + LDR x30, [sp, #(30 * 16)] + + ADD sp, sp, #(31 * 16) + + */ + for (i = 0; i <= 30; i += 1) + p += emit_ldr (p, aarch64_register (i, 1), sp, + offset_memory_operand (i * 16)); + p += emit_add (p, sp, sp, immediate_operand (31 * 16)); + + /* Pop SIMD&FP registers: + + LDP q0, q1, [sp] + ... + LDP q30, q31, [sp, #(30 * 16)] + + ADD sp, sp, #(32 * 16) + + */ + for (i = 0; i <= 30; i += 2) + p += emit_ldp_q_offset (p, i, i + 1, sp, i * 16); + p += emit_add (p, sp, sp, immediate_operand (32 * 16)); + + /* Write the code into the inferior memory. */ + append_insns (&buildaddr, p - buf, buf); + + /* Now emit the relocated instruction. */ + *adjusted_insn_addr = buildaddr; + aarch64_relocate_instruction (&buildaddr, tpaddr); + *adjusted_insn_addr_end = buildaddr; + + /* We may not have been able to relocate the instruction. */ + if (*adjusted_insn_addr == *adjusted_insn_addr_end) + { + sprintf (err, + "E.Could not relocate instruction from %s to %s.", + core_addr_to_string_nz (tpaddr), + core_addr_to_string_nz (buildaddr)); + return 1; + } + + /* Go back to the start of the buffer. */ + p = buf; + + /* Emit a branch back from the jump pad. */ + offset = (tpaddr + orig_size - buildaddr); + if (!can_encode_int32 (offset, 28)) + { + sprintf (err, + "E.Jump back from jump pad too far from tracepoint " + "(offset 0x%" PRIx32 " cannot be encoded in 28 bits).", + offset); + return 1; + } + + p += emit_b (p, 0, offset); + append_insns (&buildaddr, p - buf, buf); + + /* Give the caller a branch instruction into the jump pad. */ + offset = (*jump_entry - tpaddr); + if (!can_encode_int32 (offset, 28)) + { + sprintf (err, + "E.Jump pad too far from tracepoint " + "(offset 0x%" PRIx32 " cannot be encoded in 28 bits).", + offset); + return 1; + } + + emit_b ((uint32_t *) jjump_pad_insn, 0, offset); + *jjump_pad_insn_size = 4; + + /* Return the end address of our pad. */ + *jump_entry = buildaddr; + + return 0; +} + +/* Implementation of linux_target_ops method + "get_min_fast_tracepoint_insn_len". */ + +static int +aarch64_get_min_fast_tracepoint_insn_len (void) +{ + return 4; +} + /* Implementation of linux_target_ops method "supports_range_stepping". */ static int @@ -595,10 +2056,10 @@ struct linux_target_ops the_low_target = aarch64_linux_prepare_to_resume, NULL, /* process_qsupported */ aarch64_supports_tracepoints, - NULL, /* get_thread_area */ - NULL, /* install_fast_tracepoint_jump_pad */ + aarch64_get_thread_area, + aarch64_install_fast_tracepoint_jump_pad, NULL, /* emit_ops */ - NULL, /* get_min_fast_tracepoint_insn_len */ + aarch64_get_min_fast_tracepoint_insn_len, aarch64_supports_range_stepping, };