diff --git a/bfd/ChangeLog b/bfd/ChangeLog index d8a1f7097a..1ef747ea52 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,71 @@ +2015-10-27 Laurent Alfonsi + Christophe Monat + + * bfd-in2.h: Regenerate. + * bfd-in.h (bfd_arm_stm32l4xx_fix): New enum. Specify how + STM32L4XX instruction scanning should be done. + (bfd_elf32_arm_set_stm32l4xx_fix) + (bfd_elf32_arm_stm32l4xx_erratum_scan) + (bfd_elf32_arm_stm32l4xx_fix_veneer_locations): Add prototypes. + (bfd_elf32_arm_set_target_relocs): Add stm32l4xx fix type argument + to prototype. + * elf32-arm.c (STM32L4XX_ERRATUM_VENEER_SECTION_NAME) + (STM32L4XX_ERRATUM_VENEER_ENTRY_NAME): Define macros. + (elf32_stm32l4xx_erratum_type): New enum. + (elf32_stm32l4xx_erratum_list): New struct. List of veneers or + jumps to veneers. + (_arm_elf_section_data): Add stm32l4xx_erratumcount, + stm32l4xx_erratumlist. + (elf32_arm_link_hash_table): Add stm32l4xx_erratum_glue_size, + stm32l4xx_fix and num_stm32l4xx_fixes fields. + (ctz): New function. + (popcount): New function. + (elf32_arm_link_hash_table_create): Initialize stm32l4xx_fix. + (put_thumb2_insn): New function. + (STM32L4XX_ERRATUM_LDM_VENEER_SIZE): Define. Size of a veneer for + LDM instructions. + (STM32L4XX_ERRATUM_VLDM_VENEER_SIZE): Define. Size of a veneer for + VLDM instructions. + (bfd_elf32_arm_allocate_interworking_sections): Initialise erratum + glue section. + (record_stm32l4xx_erratum_veneer) : New function. Create a single + veneer, and its associated symbols. + (bfd_elf32_arm_add_glue_sections_to_bfd): Add STM32L4XX erratum glue. + (bfd_elf32_arm_set_stm32l4xx_fix): New function. Set the type of + erratum workaround required. + (bfd_elf32_arm_stm32l4xx_fix_veneer_locations): New function. Find + out where veneers and branches to veneers have been placed in + virtual memory after layout. + (is_thumb2_ldmia): New function. + (is_thumb2_ldmdb): Likewise. + (is_thumb2_vldm ): Likewise. + (stm32l4xx_need_create_replacing_stub): New function. Decide if a + veneer must be emitted. + (bfd_elf32_arm_stm32l4xx_erratum_scan): Scan the sections of an + input BFD for potential erratum-triggering insns. Record results. + (bfd_elf32_arm_set_target_relocs): Set stm32l4xx_fix field in + global hash table. + (elf32_arm_size_dynamic_sections): Collect glue information. + (create_instruction_branch_absolute): New function. + (create_instruction_ldmia): Likewise. + (create_instruction_ldmdb): Likewise. + (create_instruction_mov): Likewise. + (create_instruction_sub): Likewise. + (create_instruction_vldmia): Likewise. + (create_instruction_vldmdb): Likewise. + (create_instruction_udf_w): Likewise. + (create_instruction_udf): Likewise. + (push_thumb2_insn32): Likewise. + (push_thumb2_insn16): Likewise. + (stm32l4xx_fill_stub_udf): Likewise. + (stm32l4xx_create_replacing_stub_ldmia): New function. Expands the + replacing stub for ldmia instructions. + (stm32l4xx_create_replacing_stub_ldmdb): Likewise for ldmdb. + (stm32l4xx_create_replacing_stub_vldm): Likewise for vldm. + (stm32l4xx_create_replacing_stub): New function. Dispatches the + stub emission to the appropriate functions. + (elf32_arm_write_section): Output veneers, and branches to veneers. + 2015-10-27 Sangamesh Mallayya sangamesh.swamy@in.ibm.com> * configure.ac (powerpc64-*-aix[5-9].*): Match powerpc64 running diff --git a/bfd/bfd-in.h b/bfd/bfd-in.h index ae99d1e356..9e40df5713 100644 --- a/bfd/bfd-in.h +++ b/bfd/bfd-in.h @@ -849,6 +849,23 @@ extern bfd_boolean bfd_elf32_arm_vfp11_erratum_scan extern void bfd_elf32_arm_vfp11_fix_veneer_locations (bfd *, struct bfd_link_info *); +/* ARM STM STM32L4XX erratum workaround support. */ +typedef enum +{ + BFD_ARM_STM32L4XX_FIX_NONE, + BFD_ARM_STM32L4XX_FIX_DEFAULT, + BFD_ARM_STM32L4XX_FIX_ALL +} bfd_arm_stm32l4xx_fix; + +extern void bfd_elf32_arm_set_stm32l4xx_fix + (bfd *, struct bfd_link_info *); + +extern bfd_boolean bfd_elf32_arm_stm32l4xx_erratum_scan + (bfd *, struct bfd_link_info *); + +extern void bfd_elf32_arm_stm32l4xx_fix_veneer_locations + (bfd *, struct bfd_link_info *); + /* ARM Interworking support. Called from linker. */ extern bfd_boolean bfd_arm_allocate_interworking_sections (struct bfd_link_info *); @@ -878,7 +895,7 @@ extern bfd_boolean bfd_elf32_arm_process_before_allocation void bfd_elf32_arm_set_target_relocs (bfd *, struct bfd_link_info *, int, char *, int, int, bfd_arm_vfp11_fix, - int, int, int, int, int); + bfd_arm_stm32l4xx_fix, int, int, int, int, int); extern bfd_boolean bfd_elf32_arm_get_bfd_for_interworking (bfd *, struct bfd_link_info *); diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h index 9345e8846c..f381f06f3f 100644 --- a/bfd/bfd-in2.h +++ b/bfd/bfd-in2.h @@ -856,6 +856,23 @@ extern bfd_boolean bfd_elf32_arm_vfp11_erratum_scan extern void bfd_elf32_arm_vfp11_fix_veneer_locations (bfd *, struct bfd_link_info *); +/* ARM STM STM32L4XX erratum workaround support. */ +typedef enum +{ + BFD_ARM_STM32L4XX_FIX_NONE, + BFD_ARM_STM32L4XX_FIX_DEFAULT, + BFD_ARM_STM32L4XX_FIX_ALL +} bfd_arm_stm32l4xx_fix; + +extern void bfd_elf32_arm_set_stm32l4xx_fix + (bfd *, struct bfd_link_info *); + +extern bfd_boolean bfd_elf32_arm_stm32l4xx_erratum_scan + (bfd *, struct bfd_link_info *); + +extern void bfd_elf32_arm_stm32l4xx_fix_veneer_locations + (bfd *, struct bfd_link_info *); + /* ARM Interworking support. Called from linker. */ extern bfd_boolean bfd_arm_allocate_interworking_sections (struct bfd_link_info *); @@ -885,7 +902,7 @@ extern bfd_boolean bfd_elf32_arm_process_before_allocation void bfd_elf32_arm_set_target_relocs (bfd *, struct bfd_link_info *, int, char *, int, int, bfd_arm_vfp11_fix, - int, int, int, int, int); + bfd_arm_stm32l4xx_fix, int, int, int, int, int); extern bfd_boolean bfd_elf32_arm_get_bfd_for_interworking (bfd *, struct bfd_link_info *); diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c index aa01a59e9a..6f85b2b05a 100644 --- a/bfd/elf32-arm.c +++ b/bfd/elf32-arm.c @@ -2072,6 +2072,9 @@ typedef unsigned short int insn16; #define VFP11_ERRATUM_VENEER_SECTION_NAME ".vfp11_veneer" #define VFP11_ERRATUM_VENEER_ENTRY_NAME "__vfp11_veneer_%x" +#define STM32L4XX_ERRATUM_VENEER_SECTION_NAME ".text.stm32l4xx_veneer" +#define STM32L4XX_ERRATUM_VENEER_ENTRY_NAME "__stm32l4xx_veneer_%x" + #define ARM_BX_GLUE_SECTION_NAME ".v4_bx" #define ARM_BX_GLUE_ENTRY_NAME "__bx_r%d" @@ -2679,6 +2682,36 @@ typedef struct elf32_vfp11_erratum_list } elf32_vfp11_erratum_list; +/* Information about a STM32L4XX erratum veneer, or a branch to such a + veneer. */ +typedef enum +{ + STM32L4XX_ERRATUM_BRANCH_TO_VENEER, + STM32L4XX_ERRATUM_VENEER +} +elf32_stm32l4xx_erratum_type; + +typedef struct elf32_stm32l4xx_erratum_list +{ + struct elf32_stm32l4xx_erratum_list *next; + bfd_vma vma; + union + { + struct + { + struct elf32_stm32l4xx_erratum_list *veneer; + unsigned int insn; + } b; + struct + { + struct elf32_stm32l4xx_erratum_list *branch; + unsigned int id; + } v; + } u; + elf32_stm32l4xx_erratum_type type; +} +elf32_stm32l4xx_erratum_list; + typedef enum { DELETE_EXIDX_ENTRY, @@ -2709,6 +2742,8 @@ typedef struct _arm_elf_section_data /* Information about CPU errata. */ unsigned int erratumcount; elf32_vfp11_erratum_list *erratumlist; + unsigned int stm32l4xx_erratumcount; + elf32_stm32l4xx_erratum_list *stm32l4xx_erratumlist; /* Information about unwind tables. */ union { @@ -2942,6 +2977,10 @@ struct elf32_arm_link_hash_table veneers. */ bfd_size_type vfp11_erratum_glue_size; + /* The size in bytes of the section containing glue for STM32L4XX erratum + veneers. */ + bfd_size_type stm32l4xx_erratum_glue_size; + /* A table of fix locations for Cortex-A8 Thumb-2 branch/TLB erratum. This holds Cortex-A8 erratum fix locations between elf32_arm_size_stubs() and elf32_arm_write_section(). */ @@ -2982,6 +3021,13 @@ struct elf32_arm_link_hash_table /* Global counter for the number of fixes we have emitted. */ int num_vfp11_fixes; + /* What sort of code sequences we should look for which may trigger the + STM32L4XX erratum. */ + bfd_arm_stm32l4xx_fix stm32l4xx_fix; + + /* Global counter for the number of fixes we have emitted. */ + int num_stm32l4xx_fixes; + /* Nonzero to force PIC branch veneers. */ int pic_veneer; @@ -3069,6 +3115,42 @@ struct elf32_arm_link_hash_table asection **input_list; }; +static inline int +ctz (unsigned int mask) +{ +#if GCC_VERSION >= 3004 + return __builtin_ctz (mask); +#else + unsigned int i; + + for (i = 0; i < 8 * sizeof (mask); i++) + { + if (mask & 0x1) + break; + mask = (mask >> 1); + } + return i; +#endif +} + +static inline int +popcount (unsigned int mask) +{ +#if GCC_VERSION >= 3004 + return __builtin_popcount (mask); +#else + unsigned int i, sum = 0; + + for (i = 0; i < 8 * sizeof (mask); i++) + { + if (mask & 0x1) + sum++; + mask = (mask >> 1); + } + return sum; +#endif +} + /* Create an entry in an ARM ELF linker hash table. */ static struct bfd_hash_entry * @@ -3559,6 +3641,7 @@ elf32_arm_link_hash_table_create (bfd *abfd) } ret->vfp11_fix = BFD_ARM_VFP11_FIX_NONE; + ret->stm32l4xx_fix = BFD_ARM_STM32L4XX_FIX_NONE; #ifdef FOUR_WORD_PLT ret->plt_header_size = 16; ret->plt_entry_size = 16; @@ -4088,6 +4171,26 @@ put_thumb_insn (struct elf32_arm_link_hash_table * htab, bfd_putb16 (val, ptr); } +/* Store a Thumb2 insn into an output section not processed by + elf32_arm_write_section. */ + +static void +put_thumb2_insn (struct elf32_arm_link_hash_table * htab, + bfd * output_bfd, bfd_vma val, void * ptr) +{ + /* T2 instructions are 16-bit streamed. */ + if (htab->byteswap_code != bfd_little_endian (output_bfd)) + { + bfd_putl16 ((val >> 16) & 0xffff, ptr); + bfd_putl16 ((val & 0xffff), ptr + 2); + } + else + { + bfd_putb16 ((val >> 16) & 0xffff, ptr); + bfd_putb16 ((val & 0xffff), ptr + 2); + } +} + /* If it's possible to change R_TYPE to a more efficient access model, return the new reloc type. */ @@ -5718,6 +5821,8 @@ static const insn16 t2a2_noop_insn = 0x46c0; static const insn32 t2a3_b_insn = 0xea000000; #define VFP11_ERRATUM_VENEER_SIZE 8 +#define STM32L4XX_ERRATUM_LDM_VENEER_SIZE 16 +#define STM32L4XX_ERRATUM_VLDM_VENEER_SIZE 24 #define ARM_BX_VENEER_SIZE 12 static const insn32 armbx1_tst_insn = 0xe3100001; @@ -5774,6 +5879,10 @@ bfd_elf32_arm_allocate_interworking_sections (struct bfd_link_info * info) globals->vfp11_erratum_glue_size, VFP11_ERRATUM_VENEER_SECTION_NAME); + arm_allocate_glue_section_space (globals->bfd_of_glue_owner, + globals->stm32l4xx_erratum_glue_size, + STM32L4XX_ERRATUM_VENEER_SECTION_NAME); + arm_allocate_glue_section_space (globals->bfd_of_glue_owner, globals->bx_glue_size, ARM_BX_GLUE_SECTION_NAME); @@ -6065,6 +6174,125 @@ record_vfp11_erratum_veneer (struct bfd_link_info *link_info, return val; } +/* Record information about a STM32L4XX STM erratum veneer. Only THUMB-mode + veneers need to be handled because used only in Cortex-M. */ + +static bfd_vma +record_stm32l4xx_erratum_veneer (struct bfd_link_info *link_info, + elf32_stm32l4xx_erratum_list *branch, + bfd *branch_bfd, + asection *branch_sec, + unsigned int offset, + bfd_size_type veneer_size) +{ + asection *s; + struct elf32_arm_link_hash_table *hash_table; + char *tmp_name; + struct elf_link_hash_entry *myh; + struct bfd_link_hash_entry *bh; + bfd_vma val; + struct _arm_elf_section_data *sec_data; + elf32_stm32l4xx_erratum_list *newerr; + + hash_table = elf32_arm_hash_table (link_info); + BFD_ASSERT (hash_table != NULL); + BFD_ASSERT (hash_table->bfd_of_glue_owner != NULL); + + s = bfd_get_linker_section + (hash_table->bfd_of_glue_owner, STM32L4XX_ERRATUM_VENEER_SECTION_NAME); + + BFD_ASSERT (s != NULL); + + sec_data = elf32_arm_section_data (s); + + tmp_name = (char *) bfd_malloc ((bfd_size_type) strlen + (STM32L4XX_ERRATUM_VENEER_ENTRY_NAME) + 10); + + BFD_ASSERT (tmp_name); + + sprintf (tmp_name, STM32L4XX_ERRATUM_VENEER_ENTRY_NAME, + hash_table->num_stm32l4xx_fixes); + + myh = elf_link_hash_lookup + (&(hash_table)->root, tmp_name, FALSE, FALSE, FALSE); + + BFD_ASSERT (myh == NULL); + + bh = NULL; + val = hash_table->stm32l4xx_erratum_glue_size; + _bfd_generic_link_add_one_symbol (link_info, hash_table->bfd_of_glue_owner, + tmp_name, BSF_FUNCTION | BSF_LOCAL, s, val, + NULL, TRUE, FALSE, &bh); + + myh = (struct elf_link_hash_entry *) bh; + myh->type = ELF_ST_INFO (STB_LOCAL, STT_FUNC); + myh->forced_local = 1; + + /* Link veneer back to calling location. */ + sec_data->stm32l4xx_erratumcount += 1; + newerr = (elf32_stm32l4xx_erratum_list *) + bfd_zmalloc (sizeof (elf32_stm32l4xx_erratum_list)); + + newerr->type = STM32L4XX_ERRATUM_VENEER; + newerr->vma = -1; + newerr->u.v.branch = branch; + newerr->u.v.id = hash_table->num_stm32l4xx_fixes; + branch->u.b.veneer = newerr; + + newerr->next = sec_data->stm32l4xx_erratumlist; + sec_data->stm32l4xx_erratumlist = newerr; + + /* A symbol for the return from the veneer. */ + sprintf (tmp_name, STM32L4XX_ERRATUM_VENEER_ENTRY_NAME "_r", + hash_table->num_stm32l4xx_fixes); + + myh = elf_link_hash_lookup + (&(hash_table)->root, tmp_name, FALSE, FALSE, FALSE); + + if (myh != NULL) + abort (); + + bh = NULL; + val = offset + 4; + _bfd_generic_link_add_one_symbol (link_info, branch_bfd, tmp_name, BSF_LOCAL, + branch_sec, val, NULL, TRUE, FALSE, &bh); + + myh = (struct elf_link_hash_entry *) bh; + myh->type = ELF_ST_INFO (STB_LOCAL, STT_FUNC); + myh->forced_local = 1; + + free (tmp_name); + + /* Generate a mapping symbol for the veneer section, and explicitly add an + entry for that symbol to the code/data map for the section. */ + if (hash_table->stm32l4xx_erratum_glue_size == 0) + { + bh = NULL; + /* Creates a THUMB symbol since there is no other choice. */ + _bfd_generic_link_add_one_symbol (link_info, + hash_table->bfd_of_glue_owner, "$t", + BSF_LOCAL, s, 0, NULL, + TRUE, FALSE, &bh); + + myh = (struct elf_link_hash_entry *) bh; + myh->type = ELF_ST_INFO (STB_LOCAL, STT_NOTYPE); + myh->forced_local = 1; + + /* The elf32_arm_init_maps function only cares about symbols from input + BFDs. We must make a note of this generated mapping symbol + ourselves so that code byteswapping works properly in + elf32_arm_write_section. */ + elf32_arm_section_map_add (s, 't', 0); + } + + s->size += veneer_size; + hash_table->stm32l4xx_erratum_glue_size += veneer_size; + hash_table->num_stm32l4xx_fixes++; + + /* The offset of the veneer. */ + return val; +} + #define ARM_GLUE_SECTION_FLAGS \ (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS | SEC_IN_MEMORY | SEC_CODE \ | SEC_READONLY | SEC_LINKER_CREATED) @@ -6110,15 +6338,26 @@ bfd_boolean bfd_elf32_arm_add_glue_sections_to_bfd (bfd *abfd, struct bfd_link_info *info) { + struct elf32_arm_link_hash_table *globals = elf32_arm_hash_table (info); + bfd_boolean dostm32l4xx = globals + && globals->stm32l4xx_fix != BFD_ARM_STM32L4XX_FIX_NONE; + bfd_boolean addglue; + /* If we are only performing a partial link do not bother adding the glue. */ if (bfd_link_relocatable (info)) return TRUE; - return arm_make_glue_section (abfd, ARM2THUMB_GLUE_SECTION_NAME) + addglue = arm_make_glue_section (abfd, ARM2THUMB_GLUE_SECTION_NAME) && arm_make_glue_section (abfd, THUMB2ARM_GLUE_SECTION_NAME) && arm_make_glue_section (abfd, VFP11_ERRATUM_VENEER_SECTION_NAME) && arm_make_glue_section (abfd, ARM_BX_GLUE_SECTION_NAME); + + if (!dostm32l4xx) + return addglue; + + return addglue + && arm_make_glue_section (abfd, STM32L4XX_ERRATUM_VENEER_SECTION_NAME); } /* Select a BFD to be used to hold the sections used by the glue code. @@ -6437,6 +6676,26 @@ bfd_elf32_arm_set_vfp11_fix (bfd *obfd, struct bfd_link_info *link_info) globals->vfp11_fix = BFD_ARM_VFP11_FIX_NONE; } +void +bfd_elf32_arm_set_stm32l4xx_fix (bfd *obfd, struct bfd_link_info *link_info) +{ + struct elf32_arm_link_hash_table *globals = elf32_arm_hash_table (link_info); + obj_attribute *out_attr = elf_known_obj_attributes_proc (obfd); + + if (globals == NULL) + return; + + /* We assume only Cortex-M4 may require the fix. */ + if (out_attr[Tag_CPU_arch].i != TAG_CPU_ARCH_V7E_M + || out_attr[Tag_CPU_arch_profile].i != 'M') + { + if (globals->stm32l4xx_fix != BFD_ARM_STM32L4XX_FIX_NONE) + /* Give a warning, but do as the user requests anyway. */ + (*_bfd_error_handler) + (_("%B: warning: selected STM32L4XX erratum " + "workaround is not necessary for target architecture"), obfd); + } +} enum bfd_arm_vfp11_pipe { @@ -7009,6 +7268,349 @@ bfd_elf32_arm_vfp11_fix_veneer_locations (bfd *abfd, free (tmp_name); } +/* Find virtual-memory addresses for STM32L4XX erratum veneers and + return locations after sections have been laid out, using + specially-named symbols. */ + +void +bfd_elf32_arm_stm32l4xx_fix_veneer_locations (bfd *abfd, + struct bfd_link_info *link_info) +{ + asection *sec; + struct elf32_arm_link_hash_table *globals; + char *tmp_name; + + if (bfd_link_relocatable (link_info)) + return; + + /* Skip if this bfd does not correspond to an ELF image. */ + if (! is_arm_elf (abfd)) + return; + + globals = elf32_arm_hash_table (link_info); + if (globals == NULL) + return; + + tmp_name = (char *) bfd_malloc ((bfd_size_type) strlen + (STM32L4XX_ERRATUM_VENEER_ENTRY_NAME) + 10); + + for (sec = abfd->sections; sec != NULL; sec = sec->next) + { + struct _arm_elf_section_data *sec_data = elf32_arm_section_data (sec); + elf32_stm32l4xx_erratum_list *errnode = sec_data->stm32l4xx_erratumlist; + + for (; errnode != NULL; errnode = errnode->next) + { + struct elf_link_hash_entry *myh; + bfd_vma vma; + + switch (errnode->type) + { + case STM32L4XX_ERRATUM_BRANCH_TO_VENEER: + /* Find veneer symbol. */ + sprintf (tmp_name, STM32L4XX_ERRATUM_VENEER_ENTRY_NAME, + errnode->u.b.veneer->u.v.id); + + myh = elf_link_hash_lookup + (&(globals)->root, tmp_name, FALSE, FALSE, TRUE); + + if (myh == NULL) + (*_bfd_error_handler) (_("%B: unable to find STM32L4XX veneer " + "`%s'"), abfd, tmp_name); + + vma = myh->root.u.def.section->output_section->vma + + myh->root.u.def.section->output_offset + + myh->root.u.def.value; + + errnode->u.b.veneer->vma = vma; + break; + + case STM32L4XX_ERRATUM_VENEER: + /* Find return location. */ + sprintf (tmp_name, STM32L4XX_ERRATUM_VENEER_ENTRY_NAME "_r", + errnode->u.v.id); + + myh = elf_link_hash_lookup + (&(globals)->root, tmp_name, FALSE, FALSE, TRUE); + + if (myh == NULL) + (*_bfd_error_handler) (_("%B: unable to find STM32L4XX veneer " + "`%s'"), abfd, tmp_name); + + vma = myh->root.u.def.section->output_section->vma + + myh->root.u.def.section->output_offset + + myh->root.u.def.value; + + errnode->u.v.branch->vma = vma; + break; + + default: + abort (); + } + } + } + + free (tmp_name); +} + +static inline bfd_boolean +is_thumb2_ldmia (const insn32 insn) +{ + /* Encoding T2: LDM.W {!}, + 1110 - 1000 - 10W1 - rrrr - PM (0) l - llll - llll - llll. */ + return (insn & 0xffd02000) == 0xe8900000; +} + +static inline bfd_boolean +is_thumb2_ldmdb (const insn32 insn) +{ + /* Encoding T1: LDMDB {!}, + 1110 - 1001 - 00W1 - rrrr - PM (0) l - llll - llll - llll. */ + return (insn & 0xffd02000) == 0xe9100000; +} + +static inline bfd_boolean +is_thumb2_vldm (const insn32 insn) +{ + /* A6.5 Extension register load or store instruction + A7.7.229 + We look only for the 32-bit registers case since the DP (64-bit + registers) are not supported for STM32L4XX + Encoding T2 VLDM{mode} {!}, + is consecutive 32-bit registers + 1110 - 110P - UDW1 - rrrr - vvvv - 1010 - iiii - iiii + if P==0 && U==1 && W==1 && Rn=1101 VPOP + if PUW=010 || PUW=011 || PUW=101 VLDM. */ + return + ((insn & 0xfe100f00) == 0xec100a00) + && /* (IA without !). */ + (((((insn << 7) >> 28) & 0xd) == 0x4) + /* (IA with !), includes VPOP (when reg number is SP). */ + || ((((insn << 7) >> 28) & 0xd) == 0x5) + /* (DB with !). */ + || ((((insn << 7) >> 28) & 0xd) == 0x9)); +} + +/* STM STM32L4XX erratum : This function assumes that it receives an LDM or + VLDM opcode and: + - computes the number and the mode of memory accesses + - decides if the replacement should be done: + . replaces only if > 8-word accesses + . or (testing purposes only) replaces all accesses. */ + +static bfd_boolean +stm32l4xx_need_create_replacing_stub (const insn32 insn, + bfd_arm_stm32l4xx_fix stm32l4xx_fix) +{ + int nb_regs = 0; + + /* The field encoding the register list is the same for both LDMIA + and LDMDB encodings. */ + if (is_thumb2_ldmia (insn) || is_thumb2_ldmdb (insn)) + nb_regs = popcount (insn & 0x0000ffff); + else if (is_thumb2_vldm (insn)) + nb_regs = (insn & 0xff); + + /* DEFAULT mode accounts for the real bug condition situation, + ALL mode inserts stubs for each LDM/VLDM instruction (testing). */ + return + (stm32l4xx_fix == BFD_ARM_STM32L4XX_FIX_DEFAULT) ? nb_regs > 8 : + (stm32l4xx_fix == BFD_ARM_STM32L4XX_FIX_ALL) ? TRUE : FALSE; +} + +/* Look for potentially-troublesome code sequences which might trigger + the STM STM32L4XX erratum. */ + +bfd_boolean +bfd_elf32_arm_stm32l4xx_erratum_scan (bfd *abfd, + struct bfd_link_info *link_info) +{ + asection *sec; + bfd_byte *contents = NULL; + struct elf32_arm_link_hash_table *globals = elf32_arm_hash_table (link_info); + + if (globals == NULL) + return FALSE; + + /* If we are only performing a partial link do not bother + to construct any glue. */ + if (bfd_link_relocatable (link_info)) + return TRUE; + + /* Skip if this bfd does not correspond to an ELF image. */ + if (! is_arm_elf (abfd)) + return TRUE; + + if (globals->stm32l4xx_fix == BFD_ARM_STM32L4XX_FIX_NONE) + return TRUE; + + /* Skip this BFD if it corresponds to an executable or dynamic object. */ + if ((abfd->flags & (EXEC_P | DYNAMIC)) != 0) + return TRUE; + + for (sec = abfd->sections; sec != NULL; sec = sec->next) + { + unsigned int i, span; + struct _arm_elf_section_data *sec_data; + + /* If we don't have executable progbits, we're not interested in this + section. Also skip if section is to be excluded. */ + if (elf_section_type (sec) != SHT_PROGBITS + || (elf_section_flags (sec) & SHF_EXECINSTR) == 0 + || (sec->flags & SEC_EXCLUDE) != 0 + || sec->sec_info_type == SEC_INFO_TYPE_JUST_SYMS + || sec->output_section == bfd_abs_section_ptr + || strcmp (sec->name, STM32L4XX_ERRATUM_VENEER_SECTION_NAME) == 0) + continue; + + sec_data = elf32_arm_section_data (sec); + + if (sec_data->mapcount == 0) + continue; + + if (elf_section_data (sec)->this_hdr.contents != NULL) + contents = elf_section_data (sec)->this_hdr.contents; + else if (! bfd_malloc_and_get_section (abfd, sec, &contents)) + goto error_return; + + qsort (sec_data->map, sec_data->mapcount, sizeof (elf32_arm_section_map), + elf32_arm_compare_mapping); + + for (span = 0; span < sec_data->mapcount; span++) + { + unsigned int span_start = sec_data->map[span].vma; + unsigned int span_end = (span == sec_data->mapcount - 1) + ? sec->size : sec_data->map[span + 1].vma; + char span_type = sec_data->map[span].type; + int itblock_current_pos = 0; + + /* Only Thumb2 mode need be supported with this CM4 specific + code, we should not encounter any arm mode eg span_type + != 'a'. */ + if (span_type != 't') + continue; + + for (i = span_start; i < span_end;) + { + unsigned int insn = bfd_get_16 (abfd, &contents[i]); + bfd_boolean insn_32bit = FALSE; + bfd_boolean is_ldm = FALSE; + bfd_boolean is_vldm = FALSE; + bfd_boolean is_not_last_in_it_block = FALSE; + + /* The first 16-bits of all 32-bit thumb2 instructions start + with opcode[15..13]=0b111 and the encoded op1 can be anything + except opcode[12..11]!=0b00. + See 32-bit Thumb instruction encoding. */ + if ((insn & 0xe000) == 0xe000 && (insn & 0x1800) != 0x0000) + insn_32bit = TRUE; + + /* Compute the predicate that tells if the instruction + is concerned by the IT block + - Creates an error if there is a ldm that is not + last in the IT block thus cannot be replaced + - Otherwise we can create a branch at the end of the + IT block, it will be controlled naturally by IT + with the proper pseudo-predicate + - So the only interesting predicate is the one that + tells that we are not on the last item of an IT + block. */ + if (itblock_current_pos != 0) + is_not_last_in_it_block = !!--itblock_current_pos; + + if (insn_32bit) + { + /* Load the rest of the insn (in manual-friendly order). */ + insn = (insn << 16) | bfd_get_16 (abfd, &contents[i + 2]); + is_ldm = is_thumb2_ldmia (insn) || is_thumb2_ldmdb (insn); + is_vldm = is_thumb2_vldm (insn); + + /* Veneers are created for (v)ldm depending on + option flags and memory accesses conditions; but + if the instruction is not the last instruction of + an IT block, we cannot create a jump there, so we + bail out. */ + if ((is_ldm || is_vldm) && + stm32l4xx_need_create_replacing_stub + (insn, globals->stm32l4xx_fix)) + { + if (is_not_last_in_it_block) + { + (*_bfd_error_handler) + /* Note - overlong line used here to allow for translation. */ + (_("\ +%B(%A+0x%lx): error: multiple load detected in non-last IT block instruction : STM32L4XX veneer cannot be generated.\n" + "Use gcc option -mrestrict-it to generate only one instruction per IT block.\n"), + abfd, sec, (long)i); + } + else + { + elf32_stm32l4xx_erratum_list *newerr = + (elf32_stm32l4xx_erratum_list *) + bfd_zmalloc + (sizeof (elf32_stm32l4xx_erratum_list)); + + elf32_arm_section_data (sec) + ->stm32l4xx_erratumcount += 1; + newerr->u.b.insn = insn; + /* We create only thumb branches. */ + newerr->type = + STM32L4XX_ERRATUM_BRANCH_TO_VENEER; + record_stm32l4xx_erratum_veneer + (link_info, newerr, abfd, sec, + i, + is_ldm ? + STM32L4XX_ERRATUM_LDM_VENEER_SIZE: + STM32L4XX_ERRATUM_VLDM_VENEER_SIZE); + newerr->vma = -1; + newerr->next = sec_data->stm32l4xx_erratumlist; + sec_data->stm32l4xx_erratumlist = newerr; + } + } + } + else + { + /* A7.7.37 IT p208 + IT blocks are only encoded in T1 + Encoding T1: IT{x{y{z}}} + 1 0 1 1 - 1 1 1 1 - firstcond - mask + if mask = '0000' then see 'related encodings' + We don't deal with UNPREDICTABLE, just ignore these. + There can be no nested IT blocks so an IT block + is naturally a new one for which it is worth + computing its size. */ + bfd_boolean is_newitblock = ((insn & 0xff00) == 0xbf00) && + ((insn & 0x000f) != 0x0000); + /* If we have a new IT block we compute its size. */ + if (is_newitblock) + { + /* Compute the number of instructions controlled + by the IT block, it will be used to decide + whether we are inside an IT block or not. */ + unsigned int mask = insn & 0x000f; + itblock_current_pos = 4 - ctz (mask); + } + } + + i += insn_32bit ? 4 : 2; + } + } + + if (contents != NULL + && elf_section_data (sec)->this_hdr.contents != contents) + free (contents); + contents = NULL; + } + + return TRUE; + +error_return: + if (contents != NULL + && elf_section_data (sec)->this_hdr.contents != contents) + free (contents); + + return FALSE; +} /* Set target relocation values needed during linking. */ @@ -7020,6 +7622,7 @@ bfd_elf32_arm_set_target_relocs (struct bfd *output_bfd, int fix_v4bx, int use_blx, bfd_arm_vfp11_fix vfp11_fix, + bfd_arm_stm32l4xx_fix stm32l4xx_fix, int no_enum_warn, int no_wchar_warn, int pic_veneer, int fix_cortex_a8, int fix_arm1176) @@ -7045,6 +7648,7 @@ bfd_elf32_arm_set_target_relocs (struct bfd *output_bfd, globals->fix_v4bx = fix_v4bx; globals->use_blx |= use_blx; globals->vfp11_fix = vfp11_fix; + globals->stm32l4xx_fix = stm32l4xx_fix; globals->pic_veneer = pic_veneer; globals->fix_cortex_a8 = fix_cortex_a8; globals->fix_arm1176 = fix_arm1176; @@ -11175,6 +11779,11 @@ elf32_arm_final_link (bfd *abfd, struct bfd_link_info *info) VFP11_ERRATUM_VENEER_SECTION_NAME)) return FALSE; + if (! elf32_arm_output_glue_section (info, abfd, + globals->bfd_of_glue_owner, + STM32L4XX_ERRATUM_VENEER_SECTION_NAME)) + return FALSE; + if (! elf32_arm_output_glue_section (info, abfd, globals->bfd_of_glue_owner, ARM_BX_GLUE_SECTION_NAME)) @@ -13987,7 +14596,8 @@ elf32_arm_size_dynamic_sections (bfd * output_bfd ATTRIBUTE_UNUSED, bfd_elf32_arm_init_maps (ibfd); if (!bfd_elf32_arm_process_before_allocation (ibfd, info) - || !bfd_elf32_arm_vfp11_erratum_scan (ibfd, info)) + || !bfd_elf32_arm_vfp11_erratum_scan (ibfd, info) + || !bfd_elf32_arm_stm32l4xx_erratum_scan (ibfd, info)) /* xgettext:c-format */ _bfd_error_handler (_("Errors encountered processing file %s"), ibfd->filename); @@ -15539,6 +16149,741 @@ make_branch_to_a8_stub (struct bfd_hash_entry *gen_entry, return TRUE; } +/* Beginning of stm32l4xx work-around. */ + +/* Functions encoding instructions necessary for the emission of the + fix-stm32l4xx-629360. + Encoding is extracted from the + ARM (C) Architecture Reference Manual + ARMv7-A and ARMv7-R edition + ARM DDI 0406C.b (ID072512). */ + +static inline bfd_vma +create_instruction_branch_absolute (const void *const from, + const void *const to) +{ + /* A8.8.18 B (A8-334) + B target_address (Encoding T4). */ + /* 1111 - 0Sii - iiii - iiii - 10J1 - Jiii - iiii - iiii. */ + /* jump offset is: S:I1:I2:imm10:imm11:0. */ + /* with : I1 = NOT (J1 EOR S) I2 = NOT (J2 EOR S). */ + + int branch_offset = to - (from + 4); + int s = ((branch_offset & 0x1000000) >> 24); + int j1 = s ^ !((branch_offset & 0x800000) >> 23); + int j2 = s ^ !((branch_offset & 0x400000) >> 22); + + if (branch_offset < -(1 << 24) || branch_offset >= (1 << 24)) + BFD_ASSERT (0 && "Error: branch out of range. Cannot create branch."); + + bfd_vma patched_inst = 0xf0009000 + | s << 26 /* S. */ + | (((unsigned long) (branch_offset) >> 12) & 0x3ff) << 16 /* imm10. */ + | j1 << 13 /* J1. */ + | j2 << 11 /* J2. */ + | (((unsigned long) (branch_offset) >> 1) & 0x7ff); /* imm11. */ + + return patched_inst; +} + +static inline bfd_vma +create_instruction_ldmia (int base_reg, int wback, int reg_mask) +{ + /* A8.8.57 LDM/LDMIA/LDMFD (A8-396) + LDMIA Rn!, {Ra, Rb, Rc, ...} (Encoding T2). */ + bfd_vma patched_inst = 0xe8900000 + | (/*W=*/wback << 21) + | (base_reg << 16) + | (reg_mask & 0x0000ffff); + + return patched_inst; +} + +static inline bfd_vma +create_instruction_ldmdb (int base_reg, int wback, int reg_mask) +{ + /* A8.8.60 LDMDB/LDMEA (A8-402) + LDMDB Rn!, {Ra, Rb, Rc, ...} (Encoding T1). */ + bfd_vma patched_inst = 0xe9100000 + | (/*W=*/wback << 21) + | (base_reg << 16) + | (reg_mask & 0x0000ffff); + + return patched_inst; +} + +static inline bfd_vma +create_instruction_mov (int target_reg, int source_reg) +{ + /* A8.8.103 MOV (register) (A8-486) + MOV Rd, Rm (Encoding T1). */ + bfd_vma patched_inst = 0x4600 + | (target_reg & 0x7) + | ((target_reg & 0x8) >> 3) << 7 + | (source_reg << 3); + + return patched_inst; +} + +static inline bfd_vma +create_instruction_sub (int target_reg, int source_reg, int value) +{ + /* A8.8.221 SUB (immediate) (A8-708) + SUB Rd, Rn, #value (Encoding T3). */ + bfd_vma patched_inst = 0xf1a00000 + | (target_reg << 8) + | (source_reg << 16) + | (/*S=*/0 << 20) + | ((value & 0x800) >> 11) << 26 + | ((value & 0x700) >> 8) << 12 + | (value & 0x0ff); + + return patched_inst; +} + +static inline bfd_vma +create_instruction_vldmia (int base_reg, int wback, int num_regs, + int first_reg) +{ + /* A8.8.332 VLDM (A8-922) + VLMD{MODE} Rn{!}, {list} (Encoding T2). */ + bfd_vma patched_inst = 0xec900a00 + | (/*W=*/wback << 21) + | (base_reg << 16) + | (num_regs & 0x000000ff) + | (((unsigned)first_reg>>1) & 0x0000000f) << 12 + | (first_reg & 0x00000001) << 22; + + return patched_inst; +} + +static inline bfd_vma +create_instruction_vldmdb (int base_reg, int num_regs, int first_reg) +{ + /* A8.8.332 VLDM (A8-922) + VLMD{MODE} Rn!, {} (Encoding T2). */ + bfd_vma patched_inst = 0xed300a00 + | (base_reg << 16) + | (num_regs & 0x000000ff) + | (((unsigned)first_reg>>1) & 0x0000000f) << 12 + | (first_reg & 0x00000001) << 22; + + return patched_inst; +} + +static inline bfd_vma +create_instruction_udf_w (int value) +{ + /* A8.8.247 UDF (A8-758) + Undefined (Encoding T2). */ + bfd_vma patched_inst = 0xf7f0a000 + | (value & 0x00000fff) + | (value & 0x000f0000) << 16; + + return patched_inst; +} + +static inline bfd_vma +create_instruction_udf (int value) +{ + /* A8.8.247 UDF (A8-758) + Undefined (Encoding T1). */ + bfd_vma patched_inst = 0xde00 + | (value & 0xff); + + return patched_inst; +} + +/* Functions writing an instruction in memory, returning the next + memory position to write to. */ + +static inline bfd_byte * +push_thumb2_insn32 (struct elf32_arm_link_hash_table * htab, + bfd * output_bfd, bfd_byte *pt, insn32 insn) +{ + put_thumb2_insn (htab, output_bfd, insn, pt); + return pt + 4; +} + +static inline bfd_byte * +push_thumb2_insn16 (struct elf32_arm_link_hash_table * htab, + bfd * output_bfd, bfd_byte *pt, insn32 insn) +{ + put_thumb_insn (htab, output_bfd, insn, pt); + return pt + 2; +} + +/* Function filling up a region in memory with T1 and T2 UDFs taking + care of alignment. */ + +static bfd_byte * +stm32l4xx_fill_stub_udf (struct elf32_arm_link_hash_table * htab, + bfd * output_bfd, + const bfd_byte * const base_stub_contents, + bfd_byte * const from_stub_contents, + const bfd_byte * const end_stub_contents) +{ + bfd_byte *current_stub_contents = from_stub_contents; + + /* Fill the remaining of the stub with deterministic contents : UDF + instructions. + Check if realignment is needed on modulo 4 frontier using T1, to + further use T2. */ + if ((current_stub_contents < end_stub_contents) + && !((current_stub_contents - base_stub_contents) % 2) + && ((current_stub_contents - base_stub_contents) % 4)) + current_stub_contents = + push_thumb2_insn16 (htab, output_bfd, current_stub_contents, + create_instruction_udf (0)); + + for (; current_stub_contents < end_stub_contents;) + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_udf_w (0)); + + return current_stub_contents; +} + +/* Functions writing the stream of instructions equivalent to the + derived sequence for ldmia, ldmdb, vldm respectively. */ + +static void +stm32l4xx_create_replacing_stub_ldmia (struct elf32_arm_link_hash_table * htab, + bfd * output_bfd, + const insn32 initial_insn, + const bfd_byte *const initial_insn_addr, + bfd_byte *const base_stub_contents) +{ + int wback = (initial_insn & 0x00200000) >> 21; + int ri, rn = (initial_insn & 0x000F0000) >> 16; + int insn_all_registers = initial_insn & 0x0000ffff; + int insn_low_registers, insn_high_registers; + int usable_register_mask; + int nb_registers = popcount (insn_all_registers); + int restore_pc = (insn_all_registers & (1 << 15)) ? 1 : 0; + int restore_rn = (insn_all_registers & (1 << rn)) ? 1 : 0; + bfd_byte *current_stub_contents = base_stub_contents; + + BFD_ASSERT (is_thumb2_ldmia (initial_insn)); + + /* In BFD_ARM_STM32L4XX_FIX_ALL mode we may have to deal with + smaller than 8 registers load sequences that do not cause the + hardware issue. */ + if (nb_registers <= 8) + { + /* UNTOUCHED : LDMIA Rn{!}, {R-all-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + initial_insn); + + /* B initial_insn_addr+4. */ + if (!restore_pc) + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_branch_absolute + (current_stub_contents, + (char*)initial_insn_addr + 4)); + + /* Fill the remaining of the stub with deterministic contents. */ + current_stub_contents = + stm32l4xx_fill_stub_udf (htab, output_bfd, + base_stub_contents, current_stub_contents, + base_stub_contents + + STM32L4XX_ERRATUM_LDM_VENEER_SIZE); + + return; + } + + /* - reg_list[13] == 0. */ + BFD_ASSERT ((insn_all_registers & (1 << 13))==0); + + /* - reg_list[14] & reg_list[15] != 1. */ + BFD_ASSERT ((insn_all_registers & 0xC000) != 0xC000); + + /* - if (wback==1) reg_list[rn] == 0. */ + BFD_ASSERT (!wback || !restore_rn); + + /* - nb_registers > 8. */ + BFD_ASSERT (popcount (insn_all_registers) > 8); + + /* At this point, LDMxx initial insn loads between 9 and 14 registers. */ + + /* In the following algorithm, we split this wide LDM using 2 LDM insns: + - One with the 7 lowest registers (register mask 0x007F) + This LDM will finally contain between 2 and 7 registers + - One with the 7 highest registers (register mask 0xDF80) + This ldm will finally contain between 2 and 7 registers. */ + insn_low_registers = insn_all_registers & 0x007F; + insn_high_registers = insn_all_registers & 0xDF80; + + /* A spare register may be needed during this veneer to temporarily + handle the base register. This register will be restored with the + last LDM operation. + The usable register may be any general purpose register (that + excludes PC, SP, LR : register mask is 0x1FFF). */ + usable_register_mask = 0x1FFF; + + /* Generate the stub function. */ + if (wback) + { + /* LDMIA Rn!, {R-low-register-list} : (Encoding T2). */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmia + (rn, /*wback=*/1, insn_low_registers)); + + /* LDMIA Rn!, {R-high-register-list} : (Encoding T2). */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmia + (rn, /*wback=*/1, insn_high_registers)); + if (!restore_pc) + { + /* B initial_insn_addr+4. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_branch_absolute + (current_stub_contents, + (char *) initial_insn_addr + 4)); + } + } + else /* if (!wback). */ + { + ri = rn; + + /* If Rn is not part of the high-register-list, move it there. */ + if (!(insn_high_registers & (1 << rn))) + { + /* Choose a Ri in the high-register-list that will be restored. */ + ri = ctz (insn_high_registers & usable_register_mask & ~(1 << rn)); + + /* MOV Ri, Rn. */ + current_stub_contents = + push_thumb2_insn16 (htab, output_bfd, current_stub_contents, + create_instruction_mov (ri, rn)); + } + + /* LDMIA Ri!, {R-low-register-list} : (Encoding T2). */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmia + (ri, /*wback=*/1, insn_low_registers)); + + /* LDMIA Ri, {R-high-register-list} : (Encoding T2). */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmia + (ri, /*wback=*/0, insn_high_registers)); + + if (!restore_pc) + { + /* B initial_insn_addr+4. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_branch_absolute + (current_stub_contents, + (char *) initial_insn_addr + 4)); + } + } + + /* Fill the remaining of the stub with deterministic contents. */ + current_stub_contents = + stm32l4xx_fill_stub_udf (htab, output_bfd, + base_stub_contents, current_stub_contents, + base_stub_contents + + STM32L4XX_ERRATUM_LDM_VENEER_SIZE); +} + +static void +stm32l4xx_create_replacing_stub_ldmdb (struct elf32_arm_link_hash_table * htab, + bfd * output_bfd, + const insn32 initial_insn, + const bfd_byte *const initial_insn_addr, + bfd_byte *const base_stub_contents) +{ + int wback = (initial_insn & 0x00200000) >> 21; + int ri, rn = (initial_insn & 0x000f0000) >> 16; + int insn_all_registers = initial_insn & 0x0000ffff; + int insn_low_registers, insn_high_registers; + int usable_register_mask; + int restore_pc = (insn_all_registers & (1 << 15)) ? 1 : 0; + int restore_rn = (insn_all_registers & (1 << rn)) ? 1 : 0; + int nb_registers = popcount (insn_all_registers); + bfd_byte *current_stub_contents = base_stub_contents; + + BFD_ASSERT (is_thumb2_ldmdb (initial_insn)); + + /* In BFD_ARM_STM32L4XX_FIX_ALL mode we may have to deal with + smaller than 8 registers load sequences that do not cause the + hardware issue. */ + if (nb_registers <= 8) + { + /* UNTOUCHED : LDMIA Rn{!}, {R-all-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + initial_insn); + + /* B initial_insn_addr+4. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_branch_absolute + (current_stub_contents, + (char*)initial_insn_addr + 4)); + + /* Fill the remaining of the stub with deterministic contents. */ + current_stub_contents = + stm32l4xx_fill_stub_udf (htab, output_bfd, + base_stub_contents, current_stub_contents, + base_stub_contents + + STM32L4XX_ERRATUM_LDM_VENEER_SIZE); + + return; + } + + /* - reg_list[13] == 0. */ + BFD_ASSERT ((insn_all_registers & (1 << 13)) == 0); + + /* - reg_list[14] & reg_list[15] != 1. */ + BFD_ASSERT ((insn_all_registers & 0xC000) != 0xC000); + + /* - if (wback==1) reg_list[rn] == 0. */ + BFD_ASSERT (!wback || !restore_rn); + + /* - nb_registers > 8. */ + BFD_ASSERT (popcount (insn_all_registers) > 8); + + /* At this point, LDMxx initial insn loads between 9 and 14 registers. */ + + /* In the following algorithm, we split this wide LDM using 2 LDM insn: + - One with the 7 lowest registers (register mask 0x007F) + This LDM will finally contain between 2 and 7 registers + - One with the 7 highest registers (register mask 0xDF80) + This ldm will finally contain between 2 and 7 registers. */ + insn_low_registers = insn_all_registers & 0x007F; + insn_high_registers = insn_all_registers & 0xDF80; + + /* A spare register may be needed during this veneer to temporarily + handle the base register. This register will be restored with + the last LDM operation. + The usable register may be any general purpose register (that excludes + PC, SP, LR : register mask is 0x1FFF). */ + usable_register_mask = 0x1FFF; + + /* Generate the stub function. */ + if (!wback && !restore_pc && !restore_rn) + { + /* Choose a Ri in the low-register-list that will be restored. */ + ri = ctz (insn_low_registers & usable_register_mask & ~(1 << rn)); + + /* MOV Ri, Rn. */ + current_stub_contents = + push_thumb2_insn16 (htab, output_bfd, current_stub_contents, + create_instruction_mov (ri, rn)); + + /* LDMDB Ri!, {R-high-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmdb + (ri, /*wback=*/1, insn_high_registers)); + + /* LDMDB Ri, {R-low-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmdb + (ri, /*wback=*/0, insn_low_registers)); + + /* B initial_insn_addr+4. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_branch_absolute + (current_stub_contents, + (char*)initial_insn_addr + 4)); + } + else if (wback && !restore_pc && !restore_rn) + { + /* LDMDB Rn!, {R-high-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmdb + (rn, /*wback=*/1, insn_high_registers)); + + /* LDMDB Rn!, {R-low-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmdb + (rn, /*wback=*/1, insn_low_registers)); + + /* B initial_insn_addr+4. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_branch_absolute + (current_stub_contents, + (char*)initial_insn_addr + 4)); + } + else if (!wback && restore_pc && !restore_rn) + { + /* Choose a Ri in the high-register-list that will be restored. */ + ri = ctz (insn_high_registers & usable_register_mask & ~(1 << rn)); + + /* SUB Ri, Rn, #(4*nb_registers). */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_sub (ri, rn, (4 * nb_registers))); + + /* LDMIA Ri!, {R-low-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmia + (ri, /*wback=*/1, insn_low_registers)); + + /* LDMIA Ri, {R-high-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmia + (ri, /*wback=*/0, insn_high_registers)); + } + else if (wback && restore_pc && !restore_rn) + { + /* Choose a Ri in the high-register-list that will be restored. */ + ri = ctz (insn_high_registers & usable_register_mask & ~(1 << rn)); + + /* SUB Rn, Rn, #(4*nb_registers) */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_sub (rn, rn, (4 * nb_registers))); + + /* MOV Ri, Rn. */ + current_stub_contents = + push_thumb2_insn16 (htab, output_bfd, current_stub_contents, + create_instruction_mov (ri, rn)); + + /* LDMIA Ri!, {R-low-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmia + (ri, /*wback=*/1, insn_low_registers)); + + /* LDMIA Ri, {R-high-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmia + (ri, /*wback=*/0, insn_high_registers)); + } + else if (!wback && !restore_pc && restore_rn) + { + ri = rn; + if (!(insn_low_registers & (1 << rn))) + { + /* Choose a Ri in the low-register-list that will be restored. */ + ri = ctz (insn_low_registers & usable_register_mask & ~(1 << rn)); + + /* MOV Ri, Rn. */ + current_stub_contents = + push_thumb2_insn16 (htab, output_bfd, current_stub_contents, + create_instruction_mov (ri, rn)); + } + + /* LDMDB Ri!, {R-high-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmdb + (ri, /*wback=*/1, insn_high_registers)); + + /* LDMDB Ri, {R-low-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmdb + (ri, /*wback=*/0, insn_low_registers)); + + /* B initial_insn_addr+4. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_branch_absolute + (current_stub_contents, + (char *) initial_insn_addr + 4)); + } + else if (!wback && restore_pc && restore_rn) + { + ri = rn; + if (!(insn_high_registers & (1 << rn))) + { + /* Choose a Ri in the high-register-list that will be restored. */ + ri = ctz (insn_high_registers & usable_register_mask & ~(1 << rn)); + } + + /* SUB Ri, Rn, #(4*nb_registers). */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_sub (ri, rn, (4 * nb_registers))); + + /* LDMIA Ri!, {R-low-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmia + (ri, /*wback=*/1, insn_low_registers)); + + /* LDMIA Ri, {R-high-register-list}. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_ldmia + (ri, /*wback=*/0, insn_high_registers)); + } + else if (wback && restore_rn) + { + /* The assembler should not have accepted to encode this. */ + BFD_ASSERT (0 && "Cannot patch an instruction that has an " + "undefined behavior.\n"); + } + + /* Fill the remaining of the stub with deterministic contents. */ + current_stub_contents = + stm32l4xx_fill_stub_udf (htab, output_bfd, + base_stub_contents, current_stub_contents, + base_stub_contents + + STM32L4XX_ERRATUM_LDM_VENEER_SIZE); + +} + +static void +stm32l4xx_create_replacing_stub_vldm (struct elf32_arm_link_hash_table * htab, + bfd * output_bfd, + const insn32 initial_insn, + const bfd_byte *const initial_insn_addr, + bfd_byte *const base_stub_contents) +{ + int num_regs = ((unsigned int)initial_insn << 24) >> 24; + bfd_byte *current_stub_contents = base_stub_contents; + + BFD_ASSERT (is_thumb2_vldm (initial_insn)); + + /* In BFD_ARM_STM32L4XX_FIX_ALL mode we may have to deal with + smaller than 8 registers load sequences that do not cause the + hardware issue. */ + if (num_regs <= 8) + { + /* Untouched instruction. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + initial_insn); + + /* B initial_insn_addr+4. */ + current_stub_contents = + push_thumb2_insn32 (htab, output_bfd, current_stub_contents, + create_instruction_branch_absolute + (current_stub_contents, + (char*)initial_insn_addr + 4)); + } + else + { + bfd_boolean is_ia_nobang = /* (IA without !). */ + (((initial_insn << 7) >> 28) & 0xd) == 0x4; + bfd_boolean is_ia_bang = /* (IA with !) - includes VPOP. */ + (((initial_insn << 7) >> 28) & 0xd) == 0x5; + bfd_boolean is_db_bang = /* (DB with !). */ + (((initial_insn << 7) >> 28) & 0xd) == 0x9; + int base_reg = ((unsigned int)initial_insn << 12) >> 28; + /* d = UInt (Vd:D);. */ + int first_reg = ((((unsigned int)initial_insn << 16) >> 28) << 1) + | (((unsigned int)initial_insn << 9) >> 31); + + /* Compute the number of 8-register chunks needed to split. */ + int chunks = (num_regs%8) ? (num_regs/8 + 1) : (num_regs/8); + int chunk; + + /* The test coverage has been done assuming the following + hypothesis that exactly one of the previous is_ predicates is + true. */ + BFD_ASSERT ((is_ia_nobang ^ is_ia_bang ^ is_db_bang) && + !(is_ia_nobang & is_ia_bang & is_db_bang)); + + /* We treat the cutting of the register in one pass for all + cases, then we emit the adjustments: + + vldm rx, {...} + -> vldm rx!, {8_words_or_less} for each needed 8_word + -> sub rx, rx, #size (list) + + vldm rx!, {...} + -> vldm rx!, {8_words_or_less} for each needed 8_word + This also handles vpop instruction (when rx is sp) + + vldmd rx!, {...} + -> vldmb rx!, {8_words_or_less} for each needed 8_word. */ + for (chunk = 0; chunkoutput_section->vma + sec->output_offset; @@ -15647,6 +16993,89 @@ elf32_arm_write_section (bfd *output_bfd, } } + if (arm_data->stm32l4xx_erratumcount != 0) + { + for (stm32l4xx_errnode = arm_data->stm32l4xx_erratumlist; + stm32l4xx_errnode != 0; + stm32l4xx_errnode = stm32l4xx_errnode->next) + { + bfd_vma target = stm32l4xx_errnode->vma - offset; + + switch (stm32l4xx_errnode->type) + { + case STM32L4XX_ERRATUM_BRANCH_TO_VENEER: + { + unsigned int insn; + bfd_vma branch_to_veneer = + stm32l4xx_errnode->u.b.veneer->vma - stm32l4xx_errnode->vma; + + if ((signed) branch_to_veneer < -(1 << 24) + || (signed) branch_to_veneer >= (1 << 24)) + { + bfd_vma out_of_range = + ((signed) branch_to_veneer < -(1 << 24)) ? + - branch_to_veneer - (1 << 24) : + ((signed) branch_to_veneer >= (1 << 24)) ? + branch_to_veneer - (1 << 24) : 0; + + (*_bfd_error_handler) + (_("%B(%#x): error: Cannot create STM32L4XX veneer. " + "Jump out of range by %d bytes. " + "Cannot encode branch instruction. "), + output_bfd, + stm32l4xx_errnode->vma - 4, + out_of_range); + continue; + } + + insn = create_instruction_branch_absolute + ((void *) stm32l4xx_errnode->vma-4, + (void *) stm32l4xx_errnode->u.b.veneer->vma); + + /* The instruction is before the label. */ + target -= 4; + + put_thumb2_insn (globals, output_bfd, + (bfd_vma) insn, contents + target); + } + break; + + case STM32L4XX_ERRATUM_VENEER: + { + bfd_vma veneer, veneer_r; + unsigned int insn; + + veneer = (bfd_vma) (contents + target); + veneer_r = (bfd_vma) (contents + target + + stm32l4xx_errnode->u.b.veneer->vma - + stm32l4xx_errnode->vma - 4); + + if ((signed) (veneer_r - veneer - + STM32L4XX_ERRATUM_VLDM_VENEER_SIZE > + STM32L4XX_ERRATUM_LDM_VENEER_SIZE ? + STM32L4XX_ERRATUM_VLDM_VENEER_SIZE : + STM32L4XX_ERRATUM_LDM_VENEER_SIZE) < -(1 << 24) + || (signed) (veneer_r - veneer) >= (1 << 24)) + { + (*_bfd_error_handler) (_("%B: error: Cannot create STM32L4XX " + "veneer."), output_bfd); + continue; + } + + /* Original instruction. */ + insn = stm32l4xx_errnode->u.v.branch->u.b.insn; + + stm32l4xx_create_replacing_stub + (globals, output_bfd, insn, (void*)veneer_r, (void*)veneer); + } + break; + + default: + abort (); + } + } + } + if (arm_data->elf.this_hdr.sh_type == SHT_ARM_EXIDX) { arm_unwind_table_edit *edit_node @@ -15743,8 +17172,8 @@ elf32_arm_write_section (bfd *output_bfd, data.writing_section = sec; data.contents = contents; - bfd_hash_traverse (&globals->stub_hash_table, make_branch_to_a8_stub, - &data); + bfd_hash_traverse (& globals->stub_hash_table, make_branch_to_a8_stub, + & data); } if (mapcount == 0) diff --git a/ld/ChangeLog b/ld/ChangeLog index bef7b26d89..359fc19d20 100644 --- a/ld/ChangeLog +++ b/ld/ChangeLog @@ -1,3 +1,22 @@ +2015-10-27 Laurent Alfonsi + Christophe Monat + + * ld.texinfo: Add description of the STM32L4xx erratum + workaround. + * NEWS: Mention the new feature. + * emultempl/armelf.em (stm32l4xx_fix): New. + (arm_elf_before_allocation): Choose the type of fix, scan for + erratum. + (gld${EMULATION_NAME}_finish): Fix veneer locations. + (arm_elf_create_output_section_statements): Propagate + stm32l4xx_fix value. + (PARSE_AND_LIST_PROLOGUE): Define OPTION_STM32L4XX_FIX. + (PARSE_AND_LIST_LONGOPTS): Add entry for handling + --fix-stm32l4xx-629360. + (PARSE_AND_LIST_OPTION): Add entry for helping on + --fix-stm32l4xx-629360. + (PARSE_AND_LIST_ARGS_CASES): Treat OPTION_STM32L4XX_FIX. + 2015-10-27 Alan Modra PR ld/19175 diff --git a/ld/NEWS b/ld/NEWS index 89288d94a5..154c066045 100644 --- a/ld/NEWS +++ b/ld/NEWS @@ -1,4 +1,7 @@ -*- text -*- +* Add --fix-stm32l4xx-629360 to the ARM linker to enable a link-time + workaround for a bug in the bus matrix / memory controller for some of + the STM32 Cortex-M4 based products (STM32L4xx) * Add a configure option --enable-compressed-debug-sections={all,ld} to decide whether DWARF debug sections should be compressed by default. diff --git a/ld/emultempl/armelf.em b/ld/emultempl/armelf.em index 408d605b76..b03aed40bf 100644 --- a/ld/emultempl/armelf.em +++ b/ld/emultempl/armelf.em @@ -35,6 +35,7 @@ static char * target2_type = "${TARGET2_TYPE}"; static int fix_v4bx = 0; static int use_blx = 0; static bfd_arm_vfp11_fix vfp11_denorm_fix = BFD_ARM_VFP11_FIX_DEFAULT; +static bfd_arm_stm32l4xx_fix stm32l4xx_fix = BFD_ARM_STM32L4XX_FIX_NONE; static int fix_cortex_a8 = -1; static int no_enum_size_warning = 0; static int no_wchar_size_warning = 0; @@ -62,6 +63,10 @@ arm_elf_before_allocation (void) due to architecture version. */ bfd_elf32_arm_set_vfp11_fix (link_info.output_bfd, &link_info); + /* Choose type of STM32L4XX erratum fix, or warn if specified fix is + unnecessary due to architecture version. */ + bfd_elf32_arm_set_stm32l4xx_fix (link_info.output_bfd, &link_info); + /* Auto-select Cortex-A8 erratum fix if it wasn't explicitly specified. */ bfd_elf32_arm_set_cortex_a8_fix (link_info.output_bfd, &link_info); @@ -77,7 +82,9 @@ arm_elf_before_allocation (void) if (!bfd_elf32_arm_process_before_allocation (is->the_bfd, &link_info) - || !bfd_elf32_arm_vfp11_erratum_scan (is->the_bfd, &link_info)) + || !bfd_elf32_arm_vfp11_erratum_scan (is->the_bfd, &link_info) + || !bfd_elf32_arm_stm32l4xx_erratum_scan (is->the_bfd, + &link_info)) /* xgettext:c-format */ einfo (_("Errors encountered processing file %s"), is->filename); } @@ -380,6 +387,10 @@ gld${EMULATION_NAME}_finish (void) /* Figure out where VFP11 erratum veneers (and the labels returning from same) have been placed. */ bfd_elf32_arm_vfp11_fix_veneer_locations (is->the_bfd, &link_info); + + /* Figure out where STM32L4XX erratum veneers (and the labels returning + from them) have been placed. */ + bfd_elf32_arm_stm32l4xx_fix_veneer_locations (is->the_bfd, &link_info); } } @@ -468,7 +479,8 @@ arm_elf_create_output_section_statements (void) bfd_elf32_arm_set_target_relocs (link_info.output_bfd, &link_info, target1_is_rel, target2_type, fix_v4bx, use_blx, - vfp11_denorm_fix, no_enum_size_warning, + vfp11_denorm_fix, stm32l4xx_fix, + no_enum_size_warning, no_wchar_size_warning, pic_veneer, fix_cortex_a8, fix_arm1176); @@ -539,6 +551,7 @@ PARSE_AND_LIST_PROLOGUE=' #define OPTION_FIX_ARM1176 317 #define OPTION_NO_FIX_ARM1176 318 #define OPTION_LONG_PLT 319 +#define OPTION_STM32L4XX_FIX 320 ' PARSE_AND_LIST_SHORTOPTS=p @@ -554,6 +567,7 @@ PARSE_AND_LIST_LONGOPTS=' { "fix-v4bx-interworking", no_argument, NULL, OPTION_FIX_V4BX_INTERWORKING}, { "use-blx", no_argument, NULL, OPTION_USE_BLX}, { "vfp11-denorm-fix", required_argument, NULL, OPTION_VFP11_DENORM_FIX}, + { "fix-stm32l4xx-629360", optional_argument, NULL, OPTION_STM32L4XX_FIX}, { "no-enum-size-warning", no_argument, NULL, OPTION_NO_ENUM_SIZE_WARNING}, { "pic-veneer", no_argument, NULL, OPTION_PIC_VENEER}, { "stub-group-size", required_argument, NULL, OPTION_STUBGROUP_SIZE }, @@ -576,6 +590,7 @@ PARSE_AND_LIST_OPTIONS=' fprintf (file, _(" --fix-v4bx-interworking Rewrite BX rn branch to ARMv4 interworking veneer\n")); fprintf (file, _(" --use-blx Enable use of BLX instructions\n")); fprintf (file, _(" --vfp11-denorm-fix Specify how to fix VFP11 denorm erratum\n")); + fprintf (file, _(" --fix-stm32l4xx-629360 Specify how to fix STM32L4XX 629360 erratum\n")); fprintf (file, _(" --no-enum-size-warning Don'\''t warn about objects with incompatible\n" " enum sizes\n")); fprintf (file, _(" --no-wchar-size-warning Don'\''t warn about objects with incompatible\n" @@ -645,6 +660,19 @@ PARSE_AND_LIST_ARGS_CASES=' einfo (_("Unrecognized VFP11 fix type '\''%s'\''.\n"), optarg); break; + case OPTION_STM32L4XX_FIX: + if (!optarg) + stm32l4xx_fix = BFD_ARM_STM32L4XX_FIX_DEFAULT; + else if (strcmp (optarg, "none") == 0) + stm32l4xx_fix = BFD_ARM_STM32L4XX_FIX_NONE; + else if (strcmp (optarg, "default") == 0) + stm32l4xx_fix = BFD_ARM_STM32L4XX_FIX_DEFAULT; + else if (strcmp (optarg, "all") == 0) + stm32l4xx_fix = BFD_ARM_STM32L4XX_FIX_ALL; + else + einfo (_("Unrecognized STM32L4XX fix type '\''%s'\''.\n"), optarg); + break; + case OPTION_NO_ENUM_SIZE_WARNING: no_enum_size_warning = 1; break; diff --git a/ld/ld.texinfo b/ld/ld.texinfo index 0c3a55184c..8374ccf6a2 100644 --- a/ld/ld.texinfo +++ b/ld/ld.texinfo @@ -6681,6 +6681,48 @@ Further information is available in the ``ARM1176JZ-S and ARM1176JZF-S Programmer Advice Notice'' available on the ARM documentation website at: http://infocenter.arm.com/. +@cindex STM32L4xx erratum workaround +@kindex --fix-stm32l4xx-629360 + +The @samp{--fix-stm32l4xx-629360} switch enables a link-time +workaround for a bug in the bus matrix / memory controller for some of +the STM32 Cortex-M4 based products (STM32L4xx). When accessing +off-chip memory via the affected bus for bus reads of 9 words or more, +the bus can generate corrupt data and/or abort. These are only +core-initiated accesses (not DMA), and might affect any access: +integer loads such as LDM, POP and floating-point loads such as VLDM, +VPOP. Stores are not affected. + +The bug can be avoided by splitting memory accesses into the +necessary chunks to keep bus reads below 8 words. + +The workaround is not enabled by default, this is equivalent to use +@samp{--fix-stm32l4xx-629360=none}. If you know you are using buggy +STM32L4xx hardware, you can enable the workaround by specifying the +linker option @samp{--fix-stm32l4xx-629360}, or the equivalent +@samp{--fix-stm32l4xx-629360=default}. + +If the workaround is enabled, instructions are scanned for +potentially-troublesome sequences, and a veneer is created for each +such sequence which may trigger the erratum. The veneer consists in a +replacement sequence emulating the behaviour of the original one and a +branch back to the subsequent instruction. The original instruction is +then replaced with a branch to the veneer. + +The workaround does not always preserve the memory access order for +the LDMDB instruction, when the instruction loads the PC. + +The workaround is not able to handle problematic instructions when +they are in the middle of an IT block, since a branch is not allowed +there. In that case, the linker reports a warning and no replacement +occurs. + +The workaround is not able to replace problematic instructions with a +PC-relative branch instruction if the @samp{.text} section is too +large. In that case, when the branch that replaces the original code +cannot be encoded, the linker reports a warning and no replacement +occurs. + @cindex NO_ENUM_SIZE_WARNING @kindex --no-enum-size-warning The @option{--no-enum-size-warning} switch prevents the linker from diff --git a/ld/testsuite/ChangeLog b/ld/testsuite/ChangeLog index 4e412d6c8f..6981d70f33 100644 --- a/ld/testsuite/ChangeLog +++ b/ld/testsuite/ChangeLog @@ -1,3 +1,21 @@ +2015-10-27 Laurent Alfonsi + Christophe Monat + + * ld-arm/arm-elf.exp (armelftests_common): Add STM32L4XX + tests. + * ld-arm/stm32l4xx-cannot-fix-far-ldm.d: New. + * ld-arm/stm32l4xx-cannot-fix-far-ldm.s: Likewise. + * ld-arm/stm32l4xx-cannot-fix-it-block.d: Likewise. + * ld-arm/stm32l4xx-cannot-fix-it-block.s: Likewise. + * ld-arm/stm32l4xx-fix-all.d: Likewise. + * ld-arm/stm32l4xx-fix-all.s: Likewise. + * ld-arm/stm32l4xx-fix-it-block.d: Likewise. + * ld-arm/stm32l4xx-fix-it-block.s: Likewise. + * ld-arm/stm32l4xx-fix-ldm.d: Likewise. + * ld-arm/stm32l4xx-fix-ldm.s: Likewise. + * ld-arm/stm32l4xx-fix-vldm.d: Likewise. + * ld-arm/stm32l4xx-fix-vldm.s: Likewise. + 2015-10-27 Alan Modra * ld-gc/pr19161.d: xfail hppa-*-*. diff --git a/ld/testsuite/ld-arm/arm-elf.exp b/ld/testsuite/ld-arm/arm-elf.exp index 882f5cb99e..1d9b1c83f4 100644 --- a/ld/testsuite/ld-arm/arm-elf.exp +++ b/ld/testsuite/ld-arm/arm-elf.exp @@ -159,6 +159,22 @@ set armelftests_common { "-EL --vfp11-denorm-fix=scalar -Ttext=0x8000" "" "-EL -mfpu=vfpxd" {vfp11-fix-none.s} {{objdump -dr vfp11-fix-none.d}} "vfp11-fix-none"} + {"STM32L4XX erratum fix LDM" + "-EL --fix-stm32l4xx-629360 -Ttext=0x8000" "" "-EL -mcpu=cortex-m4 -mfpu=fpv4-sp-d16" {stm32l4xx-fix-ldm.s} + {{objdump -dr stm32l4xx-fix-ldm.d}} + "stm32l4xx-fix-ldm"} + {"STM32L4XX erratum fix VLDM" + "-EL --fix-stm32l4xx-629360 -Ttext=0x8000" "" "-EL -mcpu=cortex-m4 -mfpu=fpv4-sp-d16" {stm32l4xx-fix-vldm.s} + {{objdump -dr stm32l4xx-fix-vldm.d}} + "stm32l4xx-fix-vldm"} + {"STM32L4XX erratum fix ALL" + "-EL --fix-stm32l4xx-629360=all -Ttext=0x8000" "" "-EL -mcpu=cortex-m4 -mfpu=fpv4-sp-d16" {stm32l4xx-fix-all.s} + {{objdump -dr stm32l4xx-fix-all.d}} + "stm32l4xx-fix-vldm-all"} + {"STM32L4XX erratum fix in IT context" + "-EL --fix-stm32l4xx-629360 -Ttext=0x8000" "" "-EL -mcpu=cortex-m4 -mfpu=fpv4-sp-d16" {stm32l4xx-fix-it-block.s} + {{objdump -dr stm32l4xx-fix-it-block.d}} + "stm32l4xx-fix-it-block"} {"Unwinding and -gc-sections" "-gc-sections" "" "" {gc-unwind.s} {{objdump -sj.data gc-unwind.d}} "gc-unwind"} @@ -905,3 +921,5 @@ if { ![istarget "arm*-*-nacl*"] } { run_dump_test "unresolved-2" run_dump_test "gc-hidden-1" run_dump_test "protected-data" +run_dump_test "stm32l4xx-cannot-fix-it-block" +run_dump_test "stm32l4xx-cannot-fix-far-ldm" diff --git a/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-far-ldm.d b/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-far-ldm.d new file mode 100644 index 0000000000..a7674d5cc9 --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-far-ldm.d @@ -0,0 +1,25 @@ +#source: stm32l4xx-cannot-fix-far-ldm.s +#as:-EL -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 +#ld:-EL --fix-stm32l4xx-629360 -Ttext=0x80000 +#objdump: -dr --prefix-addresses --show-raw-insn +#name: STM32L4XX erratum : LDM cannot be patched when LDM is too far from veneer section +#warning: .*Cannot create STM32L4XX veneer. Jump out of range by 24 bytes. Cannot encode branch instruction.* + +# Test the `LDM*' instructions when too far from the veneer section +# They cannot, thus should not, be patched + +.*: +file format .*arm.* + +Disassembly of section \.text: +00080000 <__stm32l4xx_veneer_0> 4607[[:space:]]+mov[[:space:]]+r7, r0 +00080002 <__stm32l4xx_veneer_0\+0x2> e8b7 007e[[:space:]]+ldmia\.w[[:space:]]+r7\!, {r1, r2, r3, r4, r5, r6} +00080006 <__stm32l4xx_veneer_0\+0x6> e897 0380[[:space:]]+ldmia\.w[[:space:]]+r7, {r7, r8, r9} +0008000a <__stm32l4xx_veneer_0\+0xa> f3ff 978b[[:space:]]+b\.w[[:space:]]+0107ff24 <__stm32l4xx_veneer_0_r> +0008000e <__stm32l4xx_veneer_0\+0xe> de00[[:space:]]+udf[[:space:]]+#0 + \.\.\. + \.\.\. +0107ff20 <_start\+0xffff00> f400 906e[[:space:]]+b\.w[[:space:]]+00080000 <__stm32l4xx_veneer_0> + \.\.\. +01080024 <__stm32l4xx_veneer_0_r\+0x100> e899 03fe[[:space:]]+ldmia\.w[[:space:]]+r9, {r1, r2, r3, r4, r5, r6, r7, r8, r9} +01080028 <__stm32l4xx_veneer_1_r> bf00[[:space:]]+nop + diff --git a/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-far-ldm.s b/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-far-ldm.s new file mode 100644 index 0000000000..7ba9a16758 --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-far-ldm.s @@ -0,0 +1,27 @@ + .syntax unified + .cpu cortex-m4 + .fpu fpv4-sp-d16 + .text + .align 1 + .thumb + .thumb_func + .global _start +_start: + .space 0xFFFF00 + + @ Multiple load, case #2 + @ ldm rx, {...} -> + @ mov ry, rx where ry is the lowest register from upper_list + @ ldm ry!, { lower_list } + @ ldm ry, { upper_list } + ldm.w r0, {r1-r9} + + .space 0x100 + + @ Check that the linker never generates a wrong branch + @ ldm rx, {...} -> ldm rx, {...} + @ Emit a warning during the link phase and keep the same instruction + + ldm.w r9, {r1-r9} + + nop diff --git a/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-it-block.d b/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-it-block.d new file mode 100644 index 0000000000..9b689f10ea --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-it-block.d @@ -0,0 +1,16 @@ +#source: stm32l4xx-cannot-fix-it-block.s +#as:-EL -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 +#ld:-EL --fix-stm32l4xx-629360 -Ttext=0x8000 +#objdump: -dr --prefix-addresses --show-raw-insn +#name: STM32L4XX erratum : LDM cannot be patched when not last in IT block +#warning: .*multiple load detected in non-last IT block instruction.* + +# Test the `LDM*' instructions when non-last in IT block +# They cannot, thus should not, be patched + +.*: +file format .*arm.* + +Disassembly of section \.text: +00008000 \<_start\> bf04[[:space:]]+itt[[:space:]]+eq +00008002 \<_start\+0x2\> e899 03fe[[:space:]]+ldmiaeq\.w[[:space:]]+r9, {r1, r2, r3, r4, r5, r6, r7, r8, r9} +00008006 \<_start\+0x6\> f3af 8000[[:space:]]+nopeq\.w diff --git a/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-it-block.s b/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-it-block.s new file mode 100644 index 0000000000..9c1638821d --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-cannot-fix-it-block.s @@ -0,0 +1,16 @@ + .syntax unified + .cpu cortex-m4 + .fpu fpv4-sp-d16 + .text + .align 1 + .thumb + .thumb_func + .global _start +_start: + @ Create a situation where a multiple-load that should be + @ patched cannot be, due to its belonging to an IT block + @ but not in last position, which is the only position + @ when a branch is valid in a IT block + itt eq + ldmeq.w r9, {r1-r9} + nop.w diff --git a/ld/testsuite/ld-arm/stm32l4xx-fix-all.d b/ld/testsuite/ld-arm/stm32l4xx-fix-all.d new file mode 100644 index 0000000000..59f3ed1c92 --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-fix-all.d @@ -0,0 +1,83 @@ + +.*: file format elf32-littlearm.* + + +Disassembly of section \.text: + +00008000 <__stm32l4xx_veneer_0>: + 8000: e899 01fe ldmia\.w r9, {r1, r2, r3, r4, r5, r6, r7, r8} + 8004: f000 b84a b\.w 809c <__stm32l4xx_veneer_0_r> + 8008: f7f0 a000 udf\.w #0 + 800c: f7f0 a000 udf\.w #0 + +00008010 <__stm32l4xx_veneer_1>: + 8010: e8b9 01fe ldmia\.w r9!, {r1, r2, r3, r4, r5, r6, r7, r8} + 8014: f000 b844 b\.w 80a0 <__stm32l4xx_veneer_1_r> + 8018: f7f0 a000 udf\.w #0 + 801c: f7f0 a000 udf\.w #0 + +00008020 <__stm32l4xx_veneer_2>: + 8020: e919 01fe ldmdb r9, {r1, r2, r3, r4, r5, r6, r7, r8} + 8024: f000 b83e b\.w 80a4 <__stm32l4xx_veneer_2_r> + 8028: f7f0 a000 udf\.w #0 + 802c: f7f0 a000 udf\.w #0 + +00008030 <__stm32l4xx_veneer_3>: + 8030: e939 01fe ldmdb r9!, {r1, r2, r3, r4, r5, r6, r7, r8} + 8034: f000 b838 b\.w 80a8 <__stm32l4xx_veneer_3_r> + 8038: f7f0 a000 udf\.w #0 + 803c: f7f0 a000 udf\.w #0 + +00008040 <__stm32l4xx_veneer_4>: + 8040: e8bd 01fe ldmia\.w sp!, {r1, r2, r3, r4, r5, r6, r7, r8} + 8044: f000 b832 b\.w 80ac <__stm32l4xx_veneer_4_r> + 8048: f7f0 a000 udf\.w #0 + 804c: f7f0 a000 udf\.w #0 + +00008050 <__stm32l4xx_veneer_5>: + 8050: ecd9 0a08 vldmia r9, {s1-s8} + 8054: f000 b82c b\.w 80b0 <__stm32l4xx_veneer_5_r> + 8058: f7f0 a000 udf\.w #0 + 805c: f7f0 a000 udf\.w #0 + 8060: f7f0 a000 udf\.w #0 + 8064: f7f0 a000 udf\.w #0 + +00008068 <__stm32l4xx_veneer_6>: + 8068: ecf6 4a08 vldmia r6!, {s9-s16} + 806c: f000 b822 b\.w 80b4 <__stm32l4xx_veneer_6_r> + 8070: f7f0 a000 udf\.w #0 + 8074: f7f0 a000 udf\.w #0 + 8078: f7f0 a000 udf\.w #0 + 807c: f7f0 a000 udf\.w #0 + +00008080 <__stm32l4xx_veneer_7>: + 8080: ecfd 0a08 vpop {s1-s8} + 8084: f000 b818 b\.w 80b8 <__stm32l4xx_veneer_7_r> + 8088: f7f0 a000 udf\.w #0 + 808c: f7f0 a000 udf\.w #0 + 8090: f7f0 a000 udf\.w #0 + 8094: f7f0 a000 udf\.w #0 + +00008098 <_start>: + 8098: f7ff bfb2 b\.w 8000 <__stm32l4xx_veneer_0> + +0000809c <__stm32l4xx_veneer_0_r>: + 809c: f7ff bfb8 b\.w 8010 <__stm32l4xx_veneer_1> + +000080a0 <__stm32l4xx_veneer_1_r>: + 80a0: f7ff bfbe b\.w 8020 <__stm32l4xx_veneer_2> + +000080a4 <__stm32l4xx_veneer_2_r>: + 80a4: f7ff bfc4 b\.w 8030 <__stm32l4xx_veneer_3> + +000080a8 <__stm32l4xx_veneer_3_r>: + 80a8: f7ff bfca b\.w 8040 <__stm32l4xx_veneer_4> + +000080ac <__stm32l4xx_veneer_4_r>: + 80ac: f7ff bfd0 b\.w 8050 <__stm32l4xx_veneer_5> + +000080b0 <__stm32l4xx_veneer_5_r>: + 80b0: f7ff bfda b\.w 8068 <__stm32l4xx_veneer_6> + +000080b4 <__stm32l4xx_veneer_6_r>: + 80b4: f7ff bfe4 b\.w 8080 <__stm32l4xx_veneer_7> diff --git a/ld/testsuite/ld-arm/stm32l4xx-fix-all.s b/ld/testsuite/ld-arm/stm32l4xx-fix-all.s new file mode 100644 index 0000000000..0c1826670f --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-fix-all.s @@ -0,0 +1,22 @@ + .syntax unified + .cpu cortex-m4 + .fpu fpv4-sp-d16 + .text + .align 1 + .thumb + .thumb_func + .global _start +_start: + @ All LDM treatments for word acces <= 8 go through the same + @ replication code, but decoding may vary + ldm.w r9, {r1-r8} + ldm.w r9!, {r1-r8} + ldmdb.w r9, {r1-r8} + ldmdb.w r9!, {r1-r8} + pop {r1-r8} + + @ All VLDM treatments for word acces <= 8 go through the same + @ replication code, but decoding may vary + vldm r9, {s1-s8} + vldm r6!, {s9-s16} + vpop {s1-s8} diff --git a/ld/testsuite/ld-arm/stm32l4xx-fix-it-block.d b/ld/testsuite/ld-arm/stm32l4xx-fix-it-block.d new file mode 100644 index 0000000000..97bb34dce1 --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-fix-it-block.d @@ -0,0 +1,189 @@ + +.*: file format elf32-littlearm.* + + +Disassembly of section \.text: + +00008000 <__stm32l4xx_veneer_0>: + 8000: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8004: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8008: f000 b875 b\.w 80f6 <__stm32l4xx_veneer_0_r> + 800c: f7f0 a000 udf\.w #0 + +00008010 <__stm32l4xx_veneer_1>: + 8010: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8014: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8018: f000 b872 b\.w 8100 <__stm32l4xx_veneer_1_r> + 801c: f7f0 a000 udf\.w #0 + +00008020 <__stm32l4xx_veneer_2>: + 8020: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8024: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8028: f000 b86f b\.w 810a <__stm32l4xx_veneer_2_r> + 802c: f7f0 a000 udf\.w #0 + +00008030 <__stm32l4xx_veneer_3>: + 8030: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8034: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8038: f000 b86e b\.w 8118 <__stm32l4xx_veneer_3_r> + 803c: f7f0 a000 udf\.w #0 + +00008040 <__stm32l4xx_veneer_4>: + 8040: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8044: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8048: f000 b86d b\.w 8126 <__stm32l4xx_veneer_4_r> + 804c: f7f0 a000 udf\.w #0 + +00008050 <__stm32l4xx_veneer_5>: + 8050: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8054: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8058: f000 b86c b\.w 8134 <__stm32l4xx_veneer_5_r> + 805c: f7f0 a000 udf\.w #0 + +00008060 <__stm32l4xx_veneer_6>: + 8060: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8064: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8068: f000 b86b b\.w 8142 <__stm32l4xx_veneer_6_r> + 806c: f7f0 a000 udf\.w #0 + +00008070 <__stm32l4xx_veneer_7>: + 8070: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8074: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8078: f000 b86c b\.w 8154 <__stm32l4xx_veneer_7_r> + 807c: f7f0 a000 udf\.w #0 + +00008080 <__stm32l4xx_veneer_8>: + 8080: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8084: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8088: f000 b86d b\.w 8166 <__stm32l4xx_veneer_8_r> + 808c: f7f0 a000 udf\.w #0 + +00008090 <__stm32l4xx_veneer_9>: + 8090: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8094: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8098: f000 b86e b\.w 8178 <__stm32l4xx_veneer_9_r> + 809c: f7f0 a000 udf\.w #0 + +000080a0 <__stm32l4xx_veneer_a>: + 80a0: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 80a4: e899 0380 ldmia\.w r9, {r7, r8, r9} + 80a8: f000 b86f b\.w 818a <__stm32l4xx_veneer_a_r> + 80ac: f7f0 a000 udf\.w #0 + +000080b0 <__stm32l4xx_veneer_b>: + 80b0: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 80b4: e899 0380 ldmia\.w r9, {r7, r8, r9} + 80b8: f000 b870 b\.w 819c <__stm32l4xx_veneer_b_r> + 80bc: f7f0 a000 udf\.w #0 + +000080c0 <__stm32l4xx_veneer_c>: + 80c0: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 80c4: e899 0380 ldmia\.w r9, {r7, r8, r9} + 80c8: f000 b871 b\.w 81ae <__stm32l4xx_veneer_c_r> + 80cc: f7f0 a000 udf\.w #0 + +000080d0 <__stm32l4xx_veneer_d>: + 80d0: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 80d4: e899 0380 ldmia\.w r9, {r7, r8, r9} + 80d8: f000 b872 b\.w 81c0 <__stm32l4xx_veneer_d_r> + 80dc: f7f0 a000 udf\.w #0 + +000080e0 <__stm32l4xx_veneer_e>: + 80e0: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 80e4: e899 0380 ldmia\.w r9, {r7, r8, r9} + 80e8: f000 b873 b\.w 81d2 <__stm32l4xx_veneer_e_r> + 80ec: f7f0 a000 udf\.w #0 + +000080f0 <_start>: + 80f0: bf08 it eq + 80f2: f7ff bf85 beq\.w 8000 <__stm32l4xx_veneer_0> + +000080f6 <__stm32l4xx_veneer_0_r>: + 80f6: bf04 itt eq + 80f8: f3af 8000 nopeq\.w + 80fc: f7ff bf88 beq\.w 8010 <__stm32l4xx_veneer_1> + +00008100 <__stm32l4xx_veneer_1_r>: + 8100: bf0c ite eq + 8102: f3af 8000 nopeq\.w + 8106: f7ff bf8b bne\.w 8020 <__stm32l4xx_veneer_2> + +0000810a <__stm32l4xx_veneer_2_r>: + 810a: bf02 ittt eq + 810c: f3af 8000 nopeq\.w + 8110: f3af 8000 nopeq\.w + 8114: f7ff bf8c beq\.w 8030 <__stm32l4xx_veneer_3> + +00008118 <__stm32l4xx_veneer_3_r>: + 8118: bf0a itet eq + 811a: f3af 8000 nopeq\.w + 811e: f3af 8000 nopne\.w + 8122: f7ff bf8d beq\.w 8040 <__stm32l4xx_veneer_4> + +00008126 <__stm32l4xx_veneer_4_r>: + 8126: bf06 itte eq + 8128: f3af 8000 nopeq\.w + 812c: f3af 8000 nopeq\.w + 8130: f7ff bf8e bne\.w 8050 <__stm32l4xx_veneer_5> + +00008134 <__stm32l4xx_veneer_5_r>: + 8134: bf0e itee eq + 8136: f3af 8000 nopeq\.w + 813a: f3af 8000 nopne\.w + 813e: f7ff bf8f bne\.w 8060 <__stm32l4xx_veneer_6> + +00008142 <__stm32l4xx_veneer_6_r>: + 8142: bf01 itttt eq + 8144: f3af 8000 nopeq\.w + 8148: f3af 8000 nopeq\.w + 814c: f3af 8000 nopeq\.w + 8150: f7ff bf8e beq\.w 8070 <__stm32l4xx_veneer_7> + +00008154 <__stm32l4xx_veneer_7_r>: + 8154: bf03 ittte eq + 8156: f3af 8000 nopeq\.w + 815a: f3af 8000 nopeq\.w + 815e: f3af 8000 nopeq\.w + 8162: f7ff bf8d bne\.w 8080 <__stm32l4xx_veneer_8> + +00008166 <__stm32l4xx_veneer_8_r>: + 8166: bf05 ittet eq + 8168: f3af 8000 nopeq\.w + 816c: f3af 8000 nopeq\.w + 8170: f3af 8000 nopne\.w + 8174: f7ff bf8c beq\.w 8090 <__stm32l4xx_veneer_9> + +00008178 <__stm32l4xx_veneer_9_r>: + 8178: bf07 ittee eq + 817a: f3af 8000 nopeq\.w + 817e: f3af 8000 nopeq\.w + 8182: f3af 8000 nopne\.w + 8186: f7ff bf8b bne\.w 80a0 <__stm32l4xx_veneer_a> + +0000818a <__stm32l4xx_veneer_a_r>: + 818a: bf09 itett eq + 818c: f3af 8000 nopeq\.w + 8190: f3af 8000 nopne\.w + 8194: f3af 8000 nopeq\.w + 8198: f7ff bf8a beq\.w 80b0 <__stm32l4xx_veneer_b> + +0000819c <__stm32l4xx_veneer_b_r>: + 819c: bf0b itete eq + 819e: f3af 8000 nopeq\.w + 81a2: f3af 8000 nopne\.w + 81a6: f3af 8000 nopeq\.w + 81aa: f7ff bf89 bne\.w 80c0 <__stm32l4xx_veneer_c> + +000081ae <__stm32l4xx_veneer_c_r>: + 81ae: bf0d iteet eq + 81b0: f3af 8000 nopeq\.w + 81b4: f3af 8000 nopne\.w + 81b8: f3af 8000 nopne\.w + 81bc: f7ff bf88 beq\.w 80d0 <__stm32l4xx_veneer_d> + +000081c0 <__stm32l4xx_veneer_d_r>: + 81c0: bf0f iteee eq + 81c2: f3af 8000 nopeq\.w + 81c6: f3af 8000 nopne\.w + 81ca: f3af 8000 nopne\.w + 81ce: f7ff bf87 bne\.w 80e0 <__stm32l4xx_veneer_e> diff --git a/ld/testsuite/ld-arm/stm32l4xx-fix-it-block.s b/ld/testsuite/ld-arm/stm32l4xx-fix-it-block.s new file mode 100644 index 0000000000..567c0b6c71 --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-fix-it-block.s @@ -0,0 +1,92 @@ + .syntax unified + .cpu cortex-m4 + .fpu fpv4-sp-d16 + .text + .align 1 + .thumb + .thumb_func + .global _start +_start: + @ Create a situation where a multiple-load that should be + @ patched belongs to an IT block in the position where it can + @ be, that is the last position in the IT block. + @ Mostly to cover the IT detection logic. + @ Tests correspond to LDM CASE #1. + it eq + ldmeq.w r9, {r1-r9} + + itt eq + nop.w + ldmeq.w r9, {r1-r9} + + ite eq + nop.w + ldmne.w r9, {r1-r9} + + ittt eq + nop.w + nop.w + ldmeq.w r9, {r1-r9} + + itet eq + nop.w + nop.w + ldmeq.w r9, {r1-r9} + + itte eq + nop.w + nop.w + ldmne.w r9, {r1-r9} + + itee eq + nop.w + nop.w + ldmne.w r9, {r1-r9} + + itttt eq + nop.w + nop.w + nop.w + ldmeq.w r9, {r1-r9} + + ittte eq + nop.w + nop.w + nop.w + ldmne.w r9, {r1-r9} + + ittet eq + nop.w + nop.w + nop.w + ldmeq.w r9, {r1-r9} + + ittee eq + nop.w + nop.w + nop.w + ldmne.w r9, {r1-r9} + + itett eq + nop.w + nop.w + nop.w + ldmeq.w r9, {r1-r9} + + itete eq + nop.w + nop.w + nop.w + ldmne.w r9, {r1-r9} + + iteet eq + nop.w + nop.w + nop.w + ldmeq.w r9, {r1-r9} + + iteee eq + nop.w + nop.w + nop.w + ldmne.w r9, {r1-r9} diff --git a/ld/testsuite/ld-arm/stm32l4xx-fix-ldm.d b/ld/testsuite/ld-arm/stm32l4xx-fix-ldm.d new file mode 100644 index 0000000000..260415ddec --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-fix-ldm.d @@ -0,0 +1,174 @@ + +.*: file format elf32-littlearm.* + + +Disassembly of section \.text: + +00008000 <__stm32l4xx_veneer_0>: + 8000: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8004: e899 0380 ldmia\.w r9, {r7, r8, r9} + 8008: f000 b88c b\.w 8124 <__stm32l4xx_veneer_0_r> + 800c: f7f0 a000 udf\.w #0 + +00008010 <__stm32l4xx_veneer_1>: + 8010: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 8014: e899 8380 ldmia\.w r9, {r7, r8, r9, pc} + 8018: f7f0 a000 udf\.w #0 + 801c: f7f0 a000 udf\.w #0 + +00008020 <__stm32l4xx_veneer_2>: + 8020: 4607 mov r7, r0 + 8022: e8b7 007e ldmia\.w r7!, {r1, r2, r3, r4, r5, r6} + 8026: e897 0380 ldmia\.w r7, {r7, r8, r9} + 802a: f000 b87f b\.w 812c <__stm32l4xx_veneer_2_r> + 802e: de00 udf #0 + +00008030 <__stm32l4xx_veneer_3>: + 8030: 460f mov r7, r1 + 8032: e8b7 007e ldmia\.w r7!, {r1, r2, r3, r4, r5, r6} + 8036: e897 0380 ldmia\.w r7, {r7, r8, r9} + 803a: f000 b879 b\.w 8130 <__stm32l4xx_veneer_3_r> + 803e: de00 udf #0 + +00008040 <__stm32l4xx_veneer_4>: + 8040: 4607 mov r7, r0 + 8042: e8b7 007e ldmia\.w r7!, {r1, r2, r3, r4, r5, r6} + 8046: e897 8380 ldmia\.w r7, {r7, r8, r9, pc} + 804a: de00 udf #0 + 804c: f7f0 a000 udf\.w #0 + +00008050 <__stm32l4xx_veneer_5>: + 8050: 460f mov r7, r1 + 8052: e8b7 007e ldmia\.w r7!, {r1, r2, r3, r4, r5, r6} + 8056: e897 8380 ldmia\.w r7, {r7, r8, r9, pc} + 805a: de00 udf #0 + 805c: f7f0 a000 udf\.w #0 + +00008060 <__stm32l4xx_veneer_6>: + 8060: e8b0 007e ldmia\.w r0!, {r1, r2, r3, r4, r5, r6} + 8064: e8b0 0380 ldmia\.w r0!, {r7, r8, r9} + 8068: f000 b868 b\.w 813c <__stm32l4xx_veneer_6_r> + 806c: f7f0 a000 udf\.w #0 + +00008070 <__stm32l4xx_veneer_7>: + 8070: e8b0 007e ldmia\.w r0!, {r1, r2, r3, r4, r5, r6} + 8074: e8b0 8380 ldmia\.w r0!, {r7, r8, r9, pc} + 8078: f7f0 a000 udf\.w #0 + 807c: f7f0 a000 udf\.w #0 + +00008080 <__stm32l4xx_veneer_8>: + 8080: e931 0380 ldmdb r1!, {r7, r8, r9} + 8084: e911 007e ldmdb r1, {r1, r2, r3, r4, r5, r6} + 8088: f000 b85c b\.w 8144 <__stm32l4xx_veneer_8_r> + 808c: f7f0 a000 udf\.w #0 + +00008090 <__stm32l4xx_veneer_9>: + 8090: 4651 mov r1, sl + 8092: e931 0380 ldmdb r1!, {r7, r8, r9} + 8096: e911 007e ldmdb r1, {r1, r2, r3, r4, r5, r6} + 809a: f000 b855 b\.w 8148 <__stm32l4xx_veneer_9_r> + 809e: de00 udf #0 + +000080a0 <__stm32l4xx_veneer_a>: + 80a0: 4649 mov r1, r9 + 80a2: e931 0380 ldmdb r1!, {r7, r8, r9} + 80a6: e911 007e ldmdb r1, {r1, r2, r3, r4, r5, r6} + 80aa: f000 b84f b\.w 814c <__stm32l4xx_veneer_a_r> + 80ae: de00 udf #0 + +000080b0 <__stm32l4xx_veneer_b>: + 80b0: f1a9 0928 sub\.w r9, r9, #40 ; 0x28 + 80b4: e8b9 007e ldmia\.w r9!, {r1, r2, r3, r4, r5, r6} + 80b8: e899 8380 ldmia\.w r9, {r7, r8, r9, pc} + 80bc: f7f0 a000 udf\.w #0 + +000080c0 <__stm32l4xx_veneer_c>: + 80c0: f1a1 0728 sub\.w r7, r1, #40 ; 0x28 + 80c4: e8b7 007e ldmia\.w r7!, {r1, r2, r3, r4, r5, r6} + 80c8: e897 8380 ldmia\.w r7, {r7, r8, r9, pc} + 80cc: f7f0 a000 udf\.w #0 + +000080d0 <__stm32l4xx_veneer_d>: + 80d0: f1a0 0728 sub\.w r7, r0, #40 ; 0x28 + 80d4: e8b7 007e ldmia\.w r7!, {r1, r2, r3, r4, r5, r6} + 80d8: e897 8380 ldmia\.w r7, {r7, r8, r9, pc} + 80dc: f7f0 a000 udf\.w #0 + +000080e0 <__stm32l4xx_veneer_e>: + 80e0: e930 0380 ldmdb r0!, {r7, r8, r9} + 80e4: e930 007e ldmdb r0!, {r1, r2, r3, r4, r5, r6} + 80e8: f000 b838 b\.w 815c <__stm32l4xx_veneer_e_r> + 80ec: f7f0 a000 udf\.w #0 + +000080f0 <__stm32l4xx_veneer_f>: + 80f0: f1a0 0028 sub\.w r0, r0, #40 ; 0x28 + 80f4: 4607 mov r7, r0 + 80f6: e8b7 007e ldmia\.w r7!, {r1, r2, r3, r4, r5, r6} + 80fa: e897 8380 ldmia\.w r7, {r7, r8, r9, pc} + 80fe: de00 udf #0 + +00008100 <__stm32l4xx_veneer_10>: + 8100: e8bd 007f ldmia\.w sp!, {r0, r1, r2, r3, r4, r5, r6} + 8104: e8bd 0380 ldmia\.w sp!, {r7, r8, r9} + 8108: f000 b82c b\.w 8164 <__stm32l4xx_veneer_10_r> + 810c: f7f0 a000 udf\.w #0 + +00008110 <__stm32l4xx_veneer_11>: + 8110: e8bd 007f ldmia\.w sp!, {r0, r1, r2, r3, r4, r5, r6} + 8114: e8bd 8380 ldmia\.w sp!, {r7, r8, r9, pc} + 8118: f7f0 a000 udf\.w #0 + 811c: f7f0 a000 udf\.w #0 + +00008120 <_start>: + 8120: f7ff bf6e b\.w 8000 <__stm32l4xx_veneer_0> + +00008124 <__stm32l4xx_veneer_0_r>: + 8124: f7ff bf74 b\.w 8010 <__stm32l4xx_veneer_1> + +00008128 <__stm32l4xx_veneer_1_r>: + 8128: f7ff bf7a b\.w 8020 <__stm32l4xx_veneer_2> + +0000812c <__stm32l4xx_veneer_2_r>: + 812c: f7ff bf80 b\.w 8030 <__stm32l4xx_veneer_3> + +00008130 <__stm32l4xx_veneer_3_r>: + 8130: f7ff bf86 b\.w 8040 <__stm32l4xx_veneer_4> + +00008134 <__stm32l4xx_veneer_4_r>: + 8134: f7ff bf8c b\.w 8050 <__stm32l4xx_veneer_5> + +00008138 <__stm32l4xx_veneer_5_r>: + 8138: f7ff bf92 b\.w 8060 <__stm32l4xx_veneer_6> + +0000813c <__stm32l4xx_veneer_6_r>: + 813c: f7ff bf98 b\.w 8070 <__stm32l4xx_veneer_7> + +00008140 <__stm32l4xx_veneer_7_r>: + 8140: f7ff bf9e b\.w 8080 <__stm32l4xx_veneer_8> + +00008144 <__stm32l4xx_veneer_8_r>: + 8144: f7ff bfa4 b\.w 8090 <__stm32l4xx_veneer_9> + +00008148 <__stm32l4xx_veneer_9_r>: + 8148: f7ff bfaa b\.w 80a0 <__stm32l4xx_veneer_a> + +0000814c <__stm32l4xx_veneer_a_r>: + 814c: f7ff bfb0 b\.w 80b0 <__stm32l4xx_veneer_b> + +00008150 <__stm32l4xx_veneer_b_r>: + 8150: f7ff bfb6 b\.w 80c0 <__stm32l4xx_veneer_c> + +00008154 <__stm32l4xx_veneer_c_r>: + 8154: f7ff bfbc b\.w 80d0 <__stm32l4xx_veneer_d> + +00008158 <__stm32l4xx_veneer_d_r>: + 8158: f7ff bfc2 b\.w 80e0 <__stm32l4xx_veneer_e> + +0000815c <__stm32l4xx_veneer_e_r>: + 815c: f7ff bfc8 b\.w 80f0 <__stm32l4xx_veneer_f> + +00008160 <__stm32l4xx_veneer_f_r>: + 8160: f7ff bfce b\.w 8100 <__stm32l4xx_veneer_10> + +00008164 <__stm32l4xx_veneer_10_r>: + 8164: f7ff bfd4 b\.w 8110 <__stm32l4xx_veneer_11> diff --git a/ld/testsuite/ld-arm/stm32l4xx-fix-ldm.s b/ld/testsuite/ld-arm/stm32l4xx-fix-ldm.s new file mode 100644 index 0000000000..2f36c3e6f3 --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-fix-ldm.s @@ -0,0 +1,147 @@ + .syntax unified + .cpu cortex-m4 + .fpu fpv4-sp-d16 + .text + .align 1 + .thumb + .thumb_func + .global _start +_start: + @ LDM CASE #1 (used when rx is in upper_list) + @ ldm rx, {...} -> + @ ldm rx!, {lower_list} + @ ldm rx, {upper_list} + @ b.w + ldm.w r9, {r1-r9} + + @ LDM CASE #1 bis (used when rx is in upper_list and pc is + @ in reglist) + @ ldm rx, {...} -> + @ ldm rx!, {lower_list} + @ ldm rx, {upper_list} + ldm.w r9, {r1-r9, pc} + + @ LDM CASE #2 (used when rx is not in upper_list) + @ ldm rx, {...} -> + @ mov ry, rx where ry is the lowest register from upper_list + @ ldm ry!, {lower_list} + @ ldm ry, {upper_list} + @ b.w + ldm.w r0, {r1-r9} + + @ LDM CASE #2 bis (used when rx is in lower_list) + @ ldm rx, {...} -> + @ mov ry, rx where ry is the lowest register from upper_list + @ ldm ry!, {lower_list} + @ ldm ry, {upper_list} + @ b.w + ldm.w r1, {r1-r9} + + @ LDM CASE #2 ter (used when rx is not in upper_list and pc is + @ in reglist) + @ ldm rx, {...} -> + @ mov ry, rx where ry is the lowest register from upper_list + @ ldm ry!, {lower_list} + @ ldm ry, {upper_list} + ldm.w r0, {r1-r9, pc} + + @ LDM CASE #2 quater (used when rx is in lower_list and pc is + @ in reglist) + @ ldm rx, {...} -> + @ mov ry, rx where ry is the lowest register from upper_list + @ ldm ry!, {lower_list} + @ ldm ry, {upper_list} + ldm.w r1, {r1-r9, pc} + + @ LDM CASE #3 (used when rx is not in upper_list) + @ ldm rx, {...} -> + @ ldm rx!, {lower_list} + @ ldm rx!, {upper_list} + @ b.w + @ Write-back variant are unpredictable when rx appears also in + @ the loaded registers + ldm.w r0!, {r1-r9} + + @ LDM CASE #3 bis (used when rx is not in upper_list and pc is + @ in reglist) + @ ldm rx, {...} -> + @ ldm rx!, {lower_list} + @ ldm rx!, {upper_list} + ldm.w r0!, {r1-r9, pc} + + @ LDM CASE #4 (used when pc is not in reglist and rx is in + @ lower_list) + @ ldmb rx, {...} -> + @ ldmb rx!, {upper_list} + @ ldmb rx, {lower_list} + ldmdb.w r1, {r1-r9} + + @ LDM CASE #5 (used when pc is not in reglist and rx is not in + @ lower_list) + @ It looks like it this mean that it could be in upper_list or not + @ ldmdb rx, {...} -> + @ mov ry, rx where ry is the lowest register from lower_list + @ ldmdb ry!, {upper_list} + @ ldmdb ry , {lower_list} + @ b.w + ldmdb.w sl, {r1-r9} + + @ LDM CASE #5 bis (used when pc is not in reglist and rx is in + @ upper_list) + @ ldmdb rx, {...} -> + @ mov ry, rx where ry is the lowest register from lower_list + @ ldmdb ry!, {upper_list} + @ ldmdb ry , {lower_list} + @ b.w + ldmdb.w r9, {r1-r9} + + @ LDM CASE #6 (used when pc is in reglist and rx is in + @ upper_list) + @ ldmdb rx, {...} -> + @ sub rx, rx, #size (lower_list + upper_list) + @ ldm rx!, {lower_list} + @ ldm rx, {upper_list} + @ This case reverses the load order + ldmdb.w r9, {r1-r9, pc} + + @ LDM CASE #6 bis (used when pc is in reglist and rx is in + @ lower_list) + @ ldmdb rx, {...} -> + @ sub rx, rx, #size (lower_list + upper_list) + @ ldm rx!, {lower_list} + @ ldm rx, {upper_list} + ldmdb.w r1, {r1-r9, pc} + + @ LDM CASE #7 (used when pc is in reglist and rx is not in + @ upper_list) + @ ldmdb rx, {...} -> + @ sub ry, rx, #size (lower_list + upper_list) where ry is the lowest + @ register of the upper list + @ ldm ry!, {lower_list} + @ ldm ry , {upper_list} + @ This case reverses the load order + ldmdb.w r0, {r1-r9, pc} + + @ LDM CASE #8 (used when pc is in not in reglist) + @ ldmdb rx!, {...} -> + @ ldm rx!, {upper_list} + @ ldm rx!, {lower_list} + @ b.w + ldmdb.w r0!, {r1-r9} + + @ LDM CASE #9 (Used when pc is in reglist) + @ ldmdb rx!, {...} -> + @ sub rx, rx, #size (lower_list + upper_list) + @ mov ry, rx where ry is the lowest register from upper_list + @ ldm ry!, {lower_list} + @ ldm ry , {upper_list} + ldmdb.w r0!, {r1-r9, pc} + + @ POP CASE #1 (list does not include pc) + @ pop {...} -> pop {lower_list} pop {upper_list} + @ b.w + pop {r0-r9} + + @ POP CASE #2 (list includes PC) + @ pop {...} -> pop {lower_list} pop {upper_list} + pop {r0-r9, pc} diff --git a/ld/testsuite/ld-arm/stm32l4xx-fix-vldm.d b/ld/testsuite/ld-arm/stm32l4xx-fix-vldm.d new file mode 100644 index 0000000000..49d7beeecd --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-fix-vldm.d @@ -0,0 +1,49 @@ + +.*: file format elf32-littlearm.* + + +Disassembly of section \.text: + +00008000 <__stm32l4xx_veneer_0>: + 8000: ecf9 0a08 vldmia r9!, {s1-s8} + 8004: ecf9 4a08 vldmia r9!, {s9-s16} + 8008: ecf9 8a08 vldmia r9!, {s17-s24} + 800c: ecf9 ca07 vldmia r9!, {s25-s31} + 8010: f1a9 097c sub\.w r9, r9, #124 ; 0x7c + 8014: f000 b826 b\.w 8064 <__stm32l4xx_veneer_0_r> + +00008018 <__stm32l4xx_veneer_1>: + 8018: ecf6 4a08 vldmia r6!, {s9-s16} + 801c: ecf6 8a08 vldmia r6!, {s17-s24} + 8020: ecf6 ca05 vldmia r6!, {s25-s29} + 8024: f000 b820 b\.w 8068 <__stm32l4xx_veneer_1_r> + 8028: f7f0 a000 udf\.w #0 + 802c: f7f0 a000 udf\.w #0 + +00008030 <__stm32l4xx_veneer_2>: + 8030: ecfd 0a08 vpop {s1-s8} + 8034: ecfd 4a01 vpop {s9} + 8038: f000 b818 b\.w 806c <__stm32l4xx_veneer_2_r> + 803c: f7f0 a000 udf\.w #0 + 8040: f7f0 a000 udf\.w #0 + 8044: f7f0 a000 udf\.w #0 + +00008048 <__stm32l4xx_veneer_3>: + 8048: ed7b 0a08 vldmdb fp!, {s1-s8} + 804c: ed7b 4a08 vldmdb fp!, {s9-s16} + 8050: ed7b 8a08 vldmdb fp!, {s17-s24} + 8054: ed7b ca07 vldmdb fp!, {s25-s31} + 8058: f000 b80a b\.w 8070 <__stm32l4xx_veneer_3_r> + 805c: f7f0 a000 udf\.w #0 + +00008060 <_start>: + 8060: f7ff bfce b\.w 8000 <__stm32l4xx_veneer_0> + +00008064 <__stm32l4xx_veneer_0_r>: + 8064: f7ff bfd8 b\.w 8018 <__stm32l4xx_veneer_1> + +00008068 <__stm32l4xx_veneer_1_r>: + 8068: f7ff bfe2 b\.w 8030 <__stm32l4xx_veneer_2> + +0000806c <__stm32l4xx_veneer_2_r>: + 806c: f7ff bfec b\.w 8048 <__stm32l4xx_veneer_3> diff --git a/ld/testsuite/ld-arm/stm32l4xx-fix-vldm.s b/ld/testsuite/ld-arm/stm32l4xx-fix-vldm.s new file mode 100644 index 0000000000..94aa66e6d3 --- /dev/null +++ b/ld/testsuite/ld-arm/stm32l4xx-fix-vldm.s @@ -0,0 +1,26 @@ + .syntax unified + .cpu cortex-m4 + .fpu fpv4-sp-d16 + .text + .align 1 + .thumb + .thumb_func + .global _start +_start: + @ VLDM CASE #1 + @ vldm rx, {...} + @ -> vldm rx!, {8_words_or_less} for each + @ -> sub rx, rx, #size (list) + vldm r9, {s1-s31} + + @ VLDM CASE #2 + @ vldm rx!, {...} + @ -> vldm rx!, {8_words_or_less} for each needed 8_word + @ This also handles vpop instruction (when rx is sp) + vldm r6!, {s9-s29} + @ Explicit VPOP test + vpop {s1-s9} + + @ vldmd rx!, {...} + @ -> vldmb rx!, {8_words_or_less} for each needed 8_word + vldmdb r11!, {s1-s31}