x86: Improve -malign-branch
According to intel SDM manual, not all compare flag-modifying instructions are marcro-fusible with subsequent jcc instructions. For those non-fusible instructions, -malign-branch doesn't need to align them, only jcc itself needs to be aligned. Here are 2 restrictions which separate macro-fusible instruction from not Restriction 1: If TEST/AND/CMP/ADD/SUB/INC/DEC is one of the following format: cmp m, imm add m, imm sub m, imm test m, imm and m, imm inc m dec m it is unfusible with any jcc instruction. Restriction 2: /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture Note it also works for Skylake and Cascadelake. --------------------------------------------------------------------- | JCC | ADD/SUB/CMP | INC/DEC | TEST/AND | | ------ | ----------- | ------- | -------- | | Jo | N | N | Y | | Jno | N | N | Y | | Jc/Jb | Y | N | Y | | Jae/Jnb | Y | N | Y | | Je/Jz | Y | Y | Y | | Jne/Jnz | Y | Y | Y | | Jna/Jbe | Y | N | Y | | Ja/Jnbe | Y | N | Y | | Js | N | N | Y | | Jns | N | N | Y | | Jp/Jpe | N | N | Y | | Jnp/Jpo | N | N | Y | | Jl/Jnge | Y | Y | Y | | Jge/Jnl | Y | Y | Y | | Jle/Jng | Y | Y | Y | | Jg/Jnle | Y | Y | Y | Update maybe_fused_with_jcc_p to check if operands of CMP like instructions can be fused with condition jump. * gas/config/tc-i386.h (i386_tc_frag_data): Add member mf_type. (TC_FRAG_INIT): Init mf_type. * gas/config/tc-i386.c (enum mf_jcc_kind): New enum. (enum mf_cmp_kind): Ditto. (maybe_fused_with_jcc_p): Add argument mf_cmp_p to get mf_type of corresponding instructons, exclude unfusible instructions. (add_fused_jcc_padding_frag_p): Likewise. (add_branch_padding_frag_p): Likewise. (output_insn): Record mf_type for corresponding instructions. (i386_macro_fusible_p): New function. (i386_next_fusible_jcc_frag): Rename from i386_next_jcc_frag, add argument cmp_fragP to return next fusible jcc frag only. (i386_classify_machine_dependant_frag): Seperate macro-fusible instructions from condition jump. * gas/testsuite/gas/i386/align-branch-9.s: New file. * gas/testsuite/gas/i386/align-branch-9.d: Ditto. * gas/testsuite/gas/i386/x86-64-align-branch-9.s: Ditto. * gas/testsuite/gas/i386/x86-64-align-branch-9.d: Ditto. * gas/testsuite/gas/i386/i386.exp: Run new tests.
This commit is contained in:
parent
bb7b70ab85
commit
79d72f45a7
|
@ -687,6 +687,27 @@ static unsigned int align_branch = (align_branch_jcc_bit
|
|||
| align_branch_fused_bit
|
||||
| align_branch_jmp_bit);
|
||||
|
||||
/* Types of condition jump used by macro-fusion. */
|
||||
enum mf_jcc_kind
|
||||
{
|
||||
mf_jcc_jo = 0, /* base opcode 0x70 */
|
||||
mf_jcc_jc, /* base opcode 0x72 */
|
||||
mf_jcc_je, /* base opcode 0x74 */
|
||||
mf_jcc_jna, /* base opcode 0x76 */
|
||||
mf_jcc_js, /* base opcode 0x78 */
|
||||
mf_jcc_jp, /* base opcode 0x7a */
|
||||
mf_jcc_jl, /* base opcode 0x7c */
|
||||
mf_jcc_jle, /* base opcode 0x7e */
|
||||
};
|
||||
|
||||
/* Types of compare flag-modifying insntructions used by macro-fusion. */
|
||||
enum mf_cmp_kind
|
||||
{
|
||||
mf_cmp_test_and, /* test/cmp */
|
||||
mf_cmp_alu_cmp, /* add/sub/cmp */
|
||||
mf_cmp_incdec /* inc/dec */
|
||||
};
|
||||
|
||||
/* The maximum padding size for fused jcc. CMP like instruction can
|
||||
be 9 bytes and jcc can be 6 bytes. Leave room just in case for
|
||||
prefixes. */
|
||||
|
@ -8374,10 +8395,22 @@ encoding_length (const fragS *start_frag, offsetT start_off,
|
|||
}
|
||||
|
||||
/* Return 1 for test, and, cmp, add, sub, inc and dec which may
|
||||
be macro-fused with conditional jumps. */
|
||||
be macro-fused with conditional jumps.
|
||||
NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
|
||||
or is one of the following format:
|
||||
|
||||
cmp m, imm
|
||||
add m, imm
|
||||
sub m, imm
|
||||
test m, imm
|
||||
and m, imm
|
||||
inc m
|
||||
dec m
|
||||
|
||||
it is unfusible. */
|
||||
|
||||
static int
|
||||
maybe_fused_with_jcc_p (void)
|
||||
maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
|
||||
{
|
||||
/* No RIP address. */
|
||||
if (i.base_reg && i.base_reg->reg_num == RegIP)
|
||||
|
@ -8387,36 +8420,54 @@ maybe_fused_with_jcc_p (void)
|
|||
if (is_any_vex_encoding (&i.tm))
|
||||
return 0;
|
||||
|
||||
/* and, add, sub with destination register. */
|
||||
if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
|
||||
|| i.tm.base_opcode <= 5
|
||||
/* add, sub without add/sub m, imm. */
|
||||
if (i.tm.base_opcode <= 5
|
||||
|| (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
|
||||
|| ((i.tm.base_opcode | 3) == 0x83
|
||||
&& ((i.tm.extension_opcode | 1) == 0x5
|
||||
&& (i.tm.extension_opcode == 0x5
|
||||
|| i.tm.extension_opcode == 0x0)))
|
||||
return (i.types[1].bitfield.class == Reg
|
||||
|| i.types[1].bitfield.instance == Accum);
|
||||
{
|
||||
*mf_cmp_p = mf_cmp_alu_cmp;
|
||||
return !(i.mem_operands && i.imm_operands);
|
||||
}
|
||||
|
||||
/* test, cmp with any register. */
|
||||
/* and without and m, imm. */
|
||||
if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
|
||||
|| ((i.tm.base_opcode | 3) == 0x83
|
||||
&& i.tm.extension_opcode == 0x4))
|
||||
{
|
||||
*mf_cmp_p = mf_cmp_test_and;
|
||||
return !(i.mem_operands && i.imm_operands);
|
||||
}
|
||||
|
||||
/* test without test m imm. */
|
||||
if ((i.tm.base_opcode | 1) == 0x85
|
||||
|| (i.tm.base_opcode | 1) == 0xa9
|
||||
|| ((i.tm.base_opcode | 1) == 0xf7
|
||||
&& i.tm.extension_opcode == 0)
|
||||
|| (i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
|
||||
&& i.tm.extension_opcode == 0))
|
||||
{
|
||||
*mf_cmp_p = mf_cmp_test_and;
|
||||
return !(i.mem_operands && i.imm_operands);
|
||||
}
|
||||
|
||||
/* cmp without cmp m, imm. */
|
||||
if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
|
||||
|| ((i.tm.base_opcode | 3) == 0x83
|
||||
&& (i.tm.extension_opcode == 0x7)))
|
||||
return (i.types[0].bitfield.class == Reg
|
||||
|| i.types[0].bitfield.instance == Accum
|
||||
|| i.types[1].bitfield.class == Reg
|
||||
|| i.types[1].bitfield.instance == Accum);
|
||||
{
|
||||
*mf_cmp_p = mf_cmp_alu_cmp;
|
||||
return !(i.mem_operands && i.imm_operands);
|
||||
}
|
||||
|
||||
/* inc, dec with any register. */
|
||||
/* inc, dec without inc/dec m. */
|
||||
if ((i.tm.cpu_flags.bitfield.cpuno64
|
||||
&& (i.tm.base_opcode | 0xf) == 0x4f)
|
||||
|| ((i.tm.base_opcode | 1) == 0xff
|
||||
&& i.tm.extension_opcode <= 0x1))
|
||||
return (i.types[0].bitfield.class == Reg
|
||||
|| i.types[0].bitfield.instance == Accum);
|
||||
{
|
||||
*mf_cmp_p = mf_cmp_incdec;
|
||||
return !i.mem_operands;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -8424,7 +8475,7 @@ maybe_fused_with_jcc_p (void)
|
|||
/* Return 1 if a FUSED_JCC_PADDING frag should be generated. */
|
||||
|
||||
static int
|
||||
add_fused_jcc_padding_frag_p (void)
|
||||
add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
|
||||
{
|
||||
/* NB: Don't work with COND_JUMP86 without i386. */
|
||||
if (!align_branch_power
|
||||
|
@ -8433,7 +8484,7 @@ add_fused_jcc_padding_frag_p (void)
|
|||
|| !(align_branch & align_branch_fused_bit))
|
||||
return 0;
|
||||
|
||||
if (maybe_fused_with_jcc_p ())
|
||||
if (maybe_fused_with_jcc_p (mf_cmp_p))
|
||||
{
|
||||
if (last_insn.kind == last_insn_other
|
||||
|| last_insn.seg != now_seg)
|
||||
|
@ -8481,7 +8532,8 @@ add_branch_prefix_frag_p (void)
|
|||
/* Return 1 if a BRANCH_PADDING frag should be generated. */
|
||||
|
||||
static int
|
||||
add_branch_padding_frag_p (enum align_branch_kind *branch_p)
|
||||
add_branch_padding_frag_p (enum align_branch_kind *branch_p,
|
||||
enum mf_jcc_kind *mf_jcc_p)
|
||||
{
|
||||
int add_padding;
|
||||
|
||||
|
@ -8503,6 +8555,9 @@ add_branch_padding_frag_p (enum align_branch_kind *branch_p)
|
|||
}
|
||||
else
|
||||
{
|
||||
/* Because J<cc> and JN<cc> share same group in macro-fusible table,
|
||||
igore the lowest bit. */
|
||||
*mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
|
||||
*branch_p = align_branch_jcc;
|
||||
if ((align_branch & align_branch_jcc_bit))
|
||||
add_padding = 1;
|
||||
|
@ -8573,6 +8628,10 @@ output_insn (void)
|
|||
offsetT insn_start_off;
|
||||
fragS *fragP = NULL;
|
||||
enum align_branch_kind branch = align_branch_none;
|
||||
/* The initializer is arbitrary just to avoid uninitialized error.
|
||||
it's actually either assigned in add_branch_padding_frag_p
|
||||
or never be used. */
|
||||
enum mf_jcc_kind mf_jcc = mf_jcc_jo;
|
||||
|
||||
#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
|
||||
if (IS_ELF && x86_used_note)
|
||||
|
@ -8665,7 +8724,7 @@ output_insn (void)
|
|||
insn_start_frag = frag_now;
|
||||
insn_start_off = frag_now_fix ();
|
||||
|
||||
if (add_branch_padding_frag_p (&branch))
|
||||
if (add_branch_padding_frag_p (&branch, &mf_jcc))
|
||||
{
|
||||
char *p;
|
||||
/* Branch can be 8 bytes. Leave some room for prefixes. */
|
||||
|
@ -8686,6 +8745,7 @@ output_insn (void)
|
|||
ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
|
||||
NULL, 0, p);
|
||||
|
||||
fragP->tc_frag_data.mf_type = mf_jcc;
|
||||
fragP->tc_frag_data.branch_type = branch;
|
||||
fragP->tc_frag_data.max_bytes = max_branch_padding_size;
|
||||
}
|
||||
|
@ -8705,6 +8765,7 @@ output_insn (void)
|
|||
unsigned char *q;
|
||||
unsigned int j;
|
||||
unsigned int prefix;
|
||||
enum mf_cmp_kind mf_cmp;
|
||||
|
||||
if (avoid_fence
|
||||
&& (i.tm.base_opcode == 0xfaee8
|
||||
|
@ -8731,7 +8792,7 @@ output_insn (void)
|
|||
if (branch)
|
||||
/* Skip if this is a branch. */
|
||||
;
|
||||
else if (add_fused_jcc_padding_frag_p ())
|
||||
else if (add_fused_jcc_padding_frag_p (&mf_cmp))
|
||||
{
|
||||
/* Make room for padding. */
|
||||
frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
|
||||
|
@ -8743,6 +8804,7 @@ output_insn (void)
|
|||
ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
|
||||
NULL, 0, p);
|
||||
|
||||
fragP->tc_frag_data.mf_type = mf_cmp;
|
||||
fragP->tc_frag_data.branch_type = align_branch_fused;
|
||||
fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
|
||||
}
|
||||
|
@ -10948,6 +11010,42 @@ elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
|
||||
Note also work for Skylake and Cascadelake.
|
||||
---------------------------------------------------------------------
|
||||
| JCC | ADD/SUB/CMP | INC/DEC | TEST/AND |
|
||||
| ------ | ----------- | ------- | -------- |
|
||||
| Jo | N | N | Y |
|
||||
| Jno | N | N | Y |
|
||||
| Jc/Jb | Y | N | Y |
|
||||
| Jae/Jnb | Y | N | Y |
|
||||
| Je/Jz | Y | Y | Y |
|
||||
| Jne/Jnz | Y | Y | Y |
|
||||
| Jna/Jbe | Y | N | Y |
|
||||
| Ja/Jnbe | Y | N | Y |
|
||||
| Js | N | N | Y |
|
||||
| Jns | N | N | Y |
|
||||
| Jp/Jpe | N | N | Y |
|
||||
| Jnp/Jpo | N | N | Y |
|
||||
| Jl/Jnge | Y | Y | Y |
|
||||
| Jge/Jnl | Y | Y | Y |
|
||||
| Jle/Jng | Y | Y | Y |
|
||||
| Jg/Jnle | Y | Y | Y |
|
||||
--------------------------------------------------------------------- */
|
||||
static int
|
||||
i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
|
||||
{
|
||||
if (mf_cmp == mf_cmp_alu_cmp)
|
||||
return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
|
||||
|| mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
|
||||
if (mf_cmp == mf_cmp_incdec)
|
||||
return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
|
||||
|| mf_jcc == mf_jcc_jle);
|
||||
if (mf_cmp == mf_cmp_test_and)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return the next non-empty frag. */
|
||||
|
||||
static fragS *
|
||||
|
@ -10967,20 +11065,23 @@ i386_next_non_empty_frag (fragS *fragP)
|
|||
/* Return the next jcc frag after BRANCH_PADDING. */
|
||||
|
||||
static fragS *
|
||||
i386_next_jcc_frag (fragS *fragP)
|
||||
i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
|
||||
{
|
||||
if (!fragP)
|
||||
fragS *branch_fragP;
|
||||
if (!pad_fragP)
|
||||
return NULL;
|
||||
|
||||
if (fragP->fr_type == rs_machine_dependent
|
||||
&& (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
|
||||
if (pad_fragP->fr_type == rs_machine_dependent
|
||||
&& (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
|
||||
== BRANCH_PADDING))
|
||||
{
|
||||
fragP = i386_next_non_empty_frag (fragP);
|
||||
if (fragP->fr_type != rs_machine_dependent)
|
||||
branch_fragP = i386_next_non_empty_frag (pad_fragP);
|
||||
if (branch_fragP->fr_type != rs_machine_dependent)
|
||||
return NULL;
|
||||
if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == COND_JUMP)
|
||||
return fragP;
|
||||
if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
|
||||
&& i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
|
||||
pad_fragP->tc_frag_data.mf_type))
|
||||
return branch_fragP;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
@ -11025,7 +11126,7 @@ i386_classify_machine_dependent_frag (fragS *fragP)
|
|||
*/
|
||||
cmp_fragP = i386_next_non_empty_frag (next_fragP);
|
||||
pad_fragP = i386_next_non_empty_frag (cmp_fragP);
|
||||
branch_fragP = i386_next_jcc_frag (pad_fragP);
|
||||
branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
|
||||
if (branch_fragP)
|
||||
{
|
||||
/* The BRANCH_PADDING frag is merged with the
|
||||
|
|
|
@ -273,6 +273,7 @@ struct i386_tc_frag_data
|
|||
unsigned char prefix_length;
|
||||
unsigned char default_prefix;
|
||||
unsigned char cmp_size;
|
||||
unsigned int mf_type : 3;
|
||||
unsigned int classified : 1;
|
||||
unsigned int branch_type : 3;
|
||||
};
|
||||
|
@ -299,6 +300,7 @@ struct i386_tc_frag_data
|
|||
(FRAGP)->tc_frag_data.cmp_size = 0; \
|
||||
(FRAGP)->tc_frag_data.classified = 0; \
|
||||
(FRAGP)->tc_frag_data.branch_type = 0; \
|
||||
(FRAGP)->tc_frag_data.mf_type = 0; \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
#as: -mbranches-within-32B-boundaries
|
||||
#objdump: -dw
|
||||
|
||||
.*: +file format .*
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
0+ <foo>:
|
||||
0: 65 a3 01 00 00 00 mov %eax,%gs:0x1
|
||||
6: 55 push %ebp
|
||||
7: 55 push %ebp
|
||||
8: 55 push %ebp
|
||||
9: 55 push %ebp
|
||||
a: 89 e5 mov %esp,%ebp
|
||||
c: 89 7d f8 mov %edi,-0x8\(%ebp\)
|
||||
f: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
12: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
15: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
18: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
1b: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
1e: 39 c5 cmp %eax,%ebp
|
||||
20: 70 62 jo 84 <foo\+0x84>
|
||||
22: 89 73 f4 mov %esi,-0xc\(%ebx\)
|
||||
25: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
28: 89 7d f8 mov %edi,-0x8\(%ebp\)
|
||||
2b: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
2e: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
31: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
34: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
37: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
3a: 5d pop %ebp
|
||||
3b: 5d pop %ebp
|
||||
3c: 5d pop %ebp
|
||||
3d: 74 45 je 84 <foo\+0x84>
|
||||
3f: 5d pop %ebp
|
||||
40: 74 42 je 84 <foo\+0x84>
|
||||
42: 89 44 24 fc mov %eax,-0x4\(%esp\)
|
||||
46: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
49: 89 7d f8 mov %edi,-0x8\(%ebp\)
|
||||
4c: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
4f: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
52: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
55: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
58: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
5b: 5d pop %ebp
|
||||
5c: eb 2c jmp 8a <foo\+0x8a>
|
||||
5e: 66 90 xchg %ax,%ax
|
||||
60: eb 28 jmp 8a <foo\+0x8a>
|
||||
62: eb 26 jmp 8a <foo\+0x8a>
|
||||
64: 89 45 fc mov %eax,-0x4\(%ebp\)
|
||||
67: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
6a: 89 7d f8 mov %edi,-0x8\(%ebp\)
|
||||
6d: 5d pop %ebp
|
||||
6e: 5d pop %ebp
|
||||
6f: 40 inc %eax
|
||||
70: 72 12 jb 84 <foo\+0x84>
|
||||
72: 36 36 89 45 fc ss mov %eax,%ss:-0x4\(%ebp\)
|
||||
77: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
7a: 89 7d f8 mov %edi,-0x8\(%ebp\)
|
||||
7d: 89 75 f4 mov %esi,-0xc\(%ebp\)
|
||||
80: 21 c3 and %eax,%ebx
|
||||
82: 7c 06 jl 8a <foo\+0x8a>
|
||||
84: 8b 45 f4 mov -0xc\(%ebp\),%eax
|
||||
87: 89 45 fc mov %eax,-0x4\(%ebp\)
|
||||
8a: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
|
||||
90: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
|
||||
96: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
|
||||
9c: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
|
||||
a2: 89 75 0c mov %esi,0xc\(%ebp\)
|
||||
a5: e9 fc ff ff ff jmp a6 <foo\+0xa6>
|
||||
aa: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
|
||||
b0: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
|
||||
b6: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
|
||||
bc: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
|
||||
c2: 89 75 00 mov %esi,0x0\(%ebp\)
|
||||
c5: 74 c3 je 8a <foo\+0x8a>
|
||||
c7: 74 c1 je 8a <foo\+0x8a>
|
||||
#pass
|
|
@ -0,0 +1,74 @@
|
|||
.text
|
||||
.globl foo
|
||||
.p2align 4
|
||||
foo:
|
||||
movl %eax, %gs:0x1
|
||||
pushl %ebp
|
||||
pushl %ebp
|
||||
pushl %ebp
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
movl %edi, -8(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
cmp %eax, %ebp
|
||||
jo .L_2
|
||||
movl %esi, -12(%ebx)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %edi, -8(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
popl %ebp
|
||||
popl %ebp
|
||||
popl %ebp
|
||||
je .L_2
|
||||
popl %ebp
|
||||
je .L_2
|
||||
movl %eax, -4(%esp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %edi, -8(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
popl %ebp
|
||||
jmp .L_3
|
||||
jmp .L_3
|
||||
jmp .L_3
|
||||
movl %eax, -4(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %edi, -8(%ebp)
|
||||
popl %ebp
|
||||
popl %ebp
|
||||
inc %eax
|
||||
jc .L_2
|
||||
movl %eax, -4(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
movl %edi, -8(%ebp)
|
||||
movl %esi, -12(%ebp)
|
||||
and %eax, %ebx
|
||||
jl .L_3
|
||||
.L_2:
|
||||
movl -12(%ebp), %eax
|
||||
movl %eax, -4(%ebp)
|
||||
.L_3:
|
||||
movl %esi, -1200(%ebp)
|
||||
movl %esi, -1200(%ebp)
|
||||
movl %esi, -1200(%ebp)
|
||||
movl %esi, -1200(%ebp)
|
||||
movl %esi, 12(%ebp)
|
||||
jmp bar
|
||||
movl %esi, -1200(%ebp)
|
||||
movl %esi, -1200(%ebp)
|
||||
movl %esi, -1200(%ebp)
|
||||
movl %esi, -1200(%ebp)
|
||||
movl %esi, (%ebp)
|
||||
je .L_3
|
||||
je .L_3
|
|
@ -525,6 +525,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]]
|
|||
run_dump_test "align-branch-6"
|
||||
run_dump_test "align-branch-7"
|
||||
run_dump_test "align-branch-8"
|
||||
run_dump_test "align-branch-9"
|
||||
|
||||
# These tests require support for 8 and 16 bit relocs,
|
||||
# so we only run them for ELF and COFF targets.
|
||||
|
@ -1100,6 +1101,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t
|
|||
run_dump_test "x86-64-align-branch-6"
|
||||
run_dump_test "x86-64-align-branch-7"
|
||||
run_dump_test "x86-64-align-branch-8"
|
||||
run_dump_test "x86-64-align-branch-9"
|
||||
|
||||
if { ![istarget "*-*-aix*"]
|
||||
&& ![istarget "*-*-beos*"]
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
#as: -mbranches-within-32B-boundaries
|
||||
#objdump: -dw
|
||||
|
||||
.*: +file format .*
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
0+ <foo>:
|
||||
0: c1 e9 02 shr \$0x2,%ecx
|
||||
3: c1 e9 02 shr \$0x2,%ecx
|
||||
6: c1 e9 02 shr \$0x2,%ecx
|
||||
9: 89 d1 mov %edx,%ecx
|
||||
b: 31 c0 xor %eax,%eax
|
||||
d: c1 e9 02 shr \$0x2,%ecx
|
||||
10: c1 e9 02 shr \$0x2,%ecx
|
||||
13: c1 e9 02 shr \$0x2,%ecx
|
||||
16: c1 e9 02 shr \$0x2,%ecx
|
||||
19: c1 e9 02 shr \$0x2,%ecx
|
||||
1c: c1 e9 02 shr \$0x2,%ecx
|
||||
1f: 80 fa 02 cmp \$0x2,%dl
|
||||
22: 70 df jo 3 <foo\+0x3>
|
||||
24: 2e 2e 2e 2e 31 c0 cs cs cs cs xor %eax,%eax
|
||||
2a: c1 e9 02 shr \$0x2,%ecx
|
||||
2d: c1 e9 02 shr \$0x2,%ecx
|
||||
30: c1 e9 02 shr \$0x2,%ecx
|
||||
33: 89 d1 mov %edx,%ecx
|
||||
35: 31 c0 xor %eax,%eax
|
||||
37: c1 e9 02 shr \$0x2,%ecx
|
||||
3a: c1 e9 02 shr \$0x2,%ecx
|
||||
3d: c1 e9 02 shr \$0x2,%ecx
|
||||
40: f6 c2 02 test \$0x2,%dl
|
||||
43: 75 e8 jne 2d <foo\+0x2d>
|
||||
45: 31 c0 xor %eax,%eax
|
||||
47: c1 e9 02 shr \$0x2,%ecx
|
||||
4a: c1 e9 02 shr \$0x2,%ecx
|
||||
4d: 89 d1 mov %edx,%ecx
|
||||
4f: c1 e9 02 shr \$0x2,%ecx
|
||||
52: c1 e9 02 shr \$0x2,%ecx
|
||||
55: 89 d1 mov %edx,%ecx
|
||||
57: c1 e9 02 shr \$0x2,%ecx
|
||||
5a: 89 d1 mov %edx,%ecx
|
||||
5c: 31 c0 xor %eax,%eax
|
||||
5e: ff c0 inc %eax
|
||||
60: 76 cb jbe 2d <foo\+0x2d>
|
||||
62: 31 c0 xor %eax,%eax
|
||||
#pass
|
|
@ -0,0 +1,43 @@
|
|||
.text
|
||||
.p2align 4,,15
|
||||
foo:
|
||||
shrl $2, %ecx
|
||||
.L1:
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
movl %edx, %ecx
|
||||
xorl %eax, %eax
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
cmpb $2, %dl
|
||||
jo .L1
|
||||
xorl %eax, %eax
|
||||
shrl $2, %ecx
|
||||
.L2:
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
movl %edx, %ecx
|
||||
xorl %eax, %eax
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
testb $2, %dl
|
||||
jne .L2
|
||||
xorl %eax, %eax
|
||||
.L3:
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
movl %edx, %ecx
|
||||
shrl $2, %ecx
|
||||
shrl $2, %ecx
|
||||
movl %edx, %ecx
|
||||
shrl $2, %ecx
|
||||
movl %edx, %ecx
|
||||
xorl %eax, %eax
|
||||
inc %eax
|
||||
jbe .L2
|
||||
xorl %eax, %eax
|
Loading…
Reference in New Issue