x86: Improve -malign-branch

According to intel SDM manual, not all compare flag-modifying instructions
are marcro-fusible with subsequent jcc instructions. For those non-fusible
instructions, -malign-branch doesn't need to align them, only jcc itself
needs to be aligned.

Here are 2 restrictions which separate macro-fusible instruction from not

Restriction 1:
If TEST/AND/CMP/ADD/SUB/INC/DEC is one of the following format:

   cmp m, imm
   add m, imm
   sub m, imm
   test m, imm
   and m, imm
   inc m
   dec m

it is unfusible with any jcc instruction.

Restriction 2:

/* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
   Note it also works for Skylake and Cascadelake.
---------------------------------------------------------------------
|   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
| ------  | ----------- | ------- | -------- |
|   Jo    |      N      |    N    |     Y    |
|   Jno   |      N      |    N    |     Y    |
|  Jc/Jb  |      Y      |    N    |     Y    |
| Jae/Jnb |      Y      |    N    |     Y    |
|  Je/Jz  |      Y      |    Y    |     Y    |
| Jne/Jnz |      Y      |    Y    |     Y    |
| Jna/Jbe |      Y      |    N    |     Y    |
| Ja/Jnbe |      Y      |    N    |     Y    |
|   Js    |      N      |    N    |     Y    |
|   Jns   |      N      |    N    |     Y    |
|  Jp/Jpe |      N      |    N    |     Y    |
| Jnp/Jpo |      N      |    N    |     Y    |
| Jl/Jnge |      Y      |    Y    |     Y    |
| Jge/Jnl |      Y      |    Y    |     Y    |
| Jle/Jng |      Y      |    Y    |     Y    |
| Jg/Jnle |      Y      |    Y    |     Y    |

Update maybe_fused_with_jcc_p to check if operands of CMP like instructions
can be fused with condition jump.

	* gas/config/tc-i386.h (i386_tc_frag_data): Add member mf_type.
	(TC_FRAG_INIT): Init mf_type.
	* gas/config/tc-i386.c (enum mf_jcc_kind): New enum.
	(enum mf_cmp_kind): Ditto.
	(maybe_fused_with_jcc_p): Add argument mf_cmp_p to get
	mf_type of corresponding instructons, exclude unfusible
	instructions.
	(add_fused_jcc_padding_frag_p): Likewise.
	(add_branch_padding_frag_p): Likewise.
	(output_insn): Record mf_type for corresponding instructions.
	(i386_macro_fusible_p): New function.
	(i386_next_fusible_jcc_frag): Rename from i386_next_jcc_frag,
	add argument cmp_fragP to return next fusible jcc frag only.
	(i386_classify_machine_dependant_frag): Seperate macro-fusible
	instructions from condition jump.
	* gas/testsuite/gas/i386/align-branch-9.s: New file.
	* gas/testsuite/gas/i386/align-branch-9.d: Ditto.
	* gas/testsuite/gas/i386/x86-64-align-branch-9.s: Ditto.
	* gas/testsuite/gas/i386/x86-64-align-branch-9.d: Ditto.
	* gas/testsuite/gas/i386/i386.exp: Run new tests.
This commit is contained in:
Hongtao Liu 2020-03-03 06:21:37 -08:00 committed by H.J. Lu
parent bb7b70ab85
commit 79d72f45a7
7 changed files with 378 additions and 32 deletions

View File

@ -687,6 +687,27 @@ static unsigned int align_branch = (align_branch_jcc_bit
| align_branch_fused_bit
| align_branch_jmp_bit);
/* Types of condition jump used by macro-fusion. */
enum mf_jcc_kind
{
mf_jcc_jo = 0, /* base opcode 0x70 */
mf_jcc_jc, /* base opcode 0x72 */
mf_jcc_je, /* base opcode 0x74 */
mf_jcc_jna, /* base opcode 0x76 */
mf_jcc_js, /* base opcode 0x78 */
mf_jcc_jp, /* base opcode 0x7a */
mf_jcc_jl, /* base opcode 0x7c */
mf_jcc_jle, /* base opcode 0x7e */
};
/* Types of compare flag-modifying insntructions used by macro-fusion. */
enum mf_cmp_kind
{
mf_cmp_test_and, /* test/cmp */
mf_cmp_alu_cmp, /* add/sub/cmp */
mf_cmp_incdec /* inc/dec */
};
/* The maximum padding size for fused jcc. CMP like instruction can
be 9 bytes and jcc can be 6 bytes. Leave room just in case for
prefixes. */
@ -8374,10 +8395,22 @@ encoding_length (const fragS *start_frag, offsetT start_off,
}
/* Return 1 for test, and, cmp, add, sub, inc and dec which may
be macro-fused with conditional jumps. */
be macro-fused with conditional jumps.
NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
or is one of the following format:
cmp m, imm
add m, imm
sub m, imm
test m, imm
and m, imm
inc m
dec m
it is unfusible. */
static int
maybe_fused_with_jcc_p (void)
maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
{
/* No RIP address. */
if (i.base_reg && i.base_reg->reg_num == RegIP)
@ -8387,36 +8420,54 @@ maybe_fused_with_jcc_p (void)
if (is_any_vex_encoding (&i.tm))
return 0;
/* and, add, sub with destination register. */
if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
|| i.tm.base_opcode <= 5
/* add, sub without add/sub m, imm. */
if (i.tm.base_opcode <= 5
|| (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
|| ((i.tm.base_opcode | 3) == 0x83
&& ((i.tm.extension_opcode | 1) == 0x5
&& (i.tm.extension_opcode == 0x5
|| i.tm.extension_opcode == 0x0)))
return (i.types[1].bitfield.class == Reg
|| i.types[1].bitfield.instance == Accum);
{
*mf_cmp_p = mf_cmp_alu_cmp;
return !(i.mem_operands && i.imm_operands);
}
/* test, cmp with any register. */
/* and without and m, imm. */
if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
|| ((i.tm.base_opcode | 3) == 0x83
&& i.tm.extension_opcode == 0x4))
{
*mf_cmp_p = mf_cmp_test_and;
return !(i.mem_operands && i.imm_operands);
}
/* test without test m imm. */
if ((i.tm.base_opcode | 1) == 0x85
|| (i.tm.base_opcode | 1) == 0xa9
|| ((i.tm.base_opcode | 1) == 0xf7
&& i.tm.extension_opcode == 0)
|| (i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
&& i.tm.extension_opcode == 0))
{
*mf_cmp_p = mf_cmp_test_and;
return !(i.mem_operands && i.imm_operands);
}
/* cmp without cmp m, imm. */
if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
|| ((i.tm.base_opcode | 3) == 0x83
&& (i.tm.extension_opcode == 0x7)))
return (i.types[0].bitfield.class == Reg
|| i.types[0].bitfield.instance == Accum
|| i.types[1].bitfield.class == Reg
|| i.types[1].bitfield.instance == Accum);
{
*mf_cmp_p = mf_cmp_alu_cmp;
return !(i.mem_operands && i.imm_operands);
}
/* inc, dec with any register. */
/* inc, dec without inc/dec m. */
if ((i.tm.cpu_flags.bitfield.cpuno64
&& (i.tm.base_opcode | 0xf) == 0x4f)
|| ((i.tm.base_opcode | 1) == 0xff
&& i.tm.extension_opcode <= 0x1))
return (i.types[0].bitfield.class == Reg
|| i.types[0].bitfield.instance == Accum);
{
*mf_cmp_p = mf_cmp_incdec;
return !i.mem_operands;
}
return 0;
}
@ -8424,7 +8475,7 @@ maybe_fused_with_jcc_p (void)
/* Return 1 if a FUSED_JCC_PADDING frag should be generated. */
static int
add_fused_jcc_padding_frag_p (void)
add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
{
/* NB: Don't work with COND_JUMP86 without i386. */
if (!align_branch_power
@ -8433,7 +8484,7 @@ add_fused_jcc_padding_frag_p (void)
|| !(align_branch & align_branch_fused_bit))
return 0;
if (maybe_fused_with_jcc_p ())
if (maybe_fused_with_jcc_p (mf_cmp_p))
{
if (last_insn.kind == last_insn_other
|| last_insn.seg != now_seg)
@ -8481,7 +8532,8 @@ add_branch_prefix_frag_p (void)
/* Return 1 if a BRANCH_PADDING frag should be generated. */
static int
add_branch_padding_frag_p (enum align_branch_kind *branch_p)
add_branch_padding_frag_p (enum align_branch_kind *branch_p,
enum mf_jcc_kind *mf_jcc_p)
{
int add_padding;
@ -8503,6 +8555,9 @@ add_branch_padding_frag_p (enum align_branch_kind *branch_p)
}
else
{
/* Because J<cc> and JN<cc> share same group in macro-fusible table,
igore the lowest bit. */
*mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
*branch_p = align_branch_jcc;
if ((align_branch & align_branch_jcc_bit))
add_padding = 1;
@ -8573,6 +8628,10 @@ output_insn (void)
offsetT insn_start_off;
fragS *fragP = NULL;
enum align_branch_kind branch = align_branch_none;
/* The initializer is arbitrary just to avoid uninitialized error.
it's actually either assigned in add_branch_padding_frag_p
or never be used. */
enum mf_jcc_kind mf_jcc = mf_jcc_jo;
#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
if (IS_ELF && x86_used_note)
@ -8665,7 +8724,7 @@ output_insn (void)
insn_start_frag = frag_now;
insn_start_off = frag_now_fix ();
if (add_branch_padding_frag_p (&branch))
if (add_branch_padding_frag_p (&branch, &mf_jcc))
{
char *p;
/* Branch can be 8 bytes. Leave some room for prefixes. */
@ -8686,6 +8745,7 @@ output_insn (void)
ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
NULL, 0, p);
fragP->tc_frag_data.mf_type = mf_jcc;
fragP->tc_frag_data.branch_type = branch;
fragP->tc_frag_data.max_bytes = max_branch_padding_size;
}
@ -8705,6 +8765,7 @@ output_insn (void)
unsigned char *q;
unsigned int j;
unsigned int prefix;
enum mf_cmp_kind mf_cmp;
if (avoid_fence
&& (i.tm.base_opcode == 0xfaee8
@ -8731,7 +8792,7 @@ output_insn (void)
if (branch)
/* Skip if this is a branch. */
;
else if (add_fused_jcc_padding_frag_p ())
else if (add_fused_jcc_padding_frag_p (&mf_cmp))
{
/* Make room for padding. */
frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
@ -8743,6 +8804,7 @@ output_insn (void)
ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
NULL, 0, p);
fragP->tc_frag_data.mf_type = mf_cmp;
fragP->tc_frag_data.branch_type = align_branch_fused;
fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
}
@ -10948,6 +11010,42 @@ elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
}
#endif
/* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
Note also work for Skylake and Cascadelake.
---------------------------------------------------------------------
| JCC | ADD/SUB/CMP | INC/DEC | TEST/AND |
| ------ | ----------- | ------- | -------- |
| Jo | N | N | Y |
| Jno | N | N | Y |
| Jc/Jb | Y | N | Y |
| Jae/Jnb | Y | N | Y |
| Je/Jz | Y | Y | Y |
| Jne/Jnz | Y | Y | Y |
| Jna/Jbe | Y | N | Y |
| Ja/Jnbe | Y | N | Y |
| Js | N | N | Y |
| Jns | N | N | Y |
| Jp/Jpe | N | N | Y |
| Jnp/Jpo | N | N | Y |
| Jl/Jnge | Y | Y | Y |
| Jge/Jnl | Y | Y | Y |
| Jle/Jng | Y | Y | Y |
| Jg/Jnle | Y | Y | Y |
--------------------------------------------------------------------- */
static int
i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
{
if (mf_cmp == mf_cmp_alu_cmp)
return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
|| mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
if (mf_cmp == mf_cmp_incdec)
return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
|| mf_jcc == mf_jcc_jle);
if (mf_cmp == mf_cmp_test_and)
return 1;
return 0;
}
/* Return the next non-empty frag. */
static fragS *
@ -10967,20 +11065,23 @@ i386_next_non_empty_frag (fragS *fragP)
/* Return the next jcc frag after BRANCH_PADDING. */
static fragS *
i386_next_jcc_frag (fragS *fragP)
i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
{
if (!fragP)
fragS *branch_fragP;
if (!pad_fragP)
return NULL;
if (fragP->fr_type == rs_machine_dependent
&& (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
if (pad_fragP->fr_type == rs_machine_dependent
&& (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
== BRANCH_PADDING))
{
fragP = i386_next_non_empty_frag (fragP);
if (fragP->fr_type != rs_machine_dependent)
branch_fragP = i386_next_non_empty_frag (pad_fragP);
if (branch_fragP->fr_type != rs_machine_dependent)
return NULL;
if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == COND_JUMP)
return fragP;
if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
&& i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
pad_fragP->tc_frag_data.mf_type))
return branch_fragP;
}
return NULL;
@ -11025,7 +11126,7 @@ i386_classify_machine_dependent_frag (fragS *fragP)
*/
cmp_fragP = i386_next_non_empty_frag (next_fragP);
pad_fragP = i386_next_non_empty_frag (cmp_fragP);
branch_fragP = i386_next_jcc_frag (pad_fragP);
branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
if (branch_fragP)
{
/* The BRANCH_PADDING frag is merged with the

View File

@ -273,6 +273,7 @@ struct i386_tc_frag_data
unsigned char prefix_length;
unsigned char default_prefix;
unsigned char cmp_size;
unsigned int mf_type : 3;
unsigned int classified : 1;
unsigned int branch_type : 3;
};
@ -299,6 +300,7 @@ struct i386_tc_frag_data
(FRAGP)->tc_frag_data.cmp_size = 0; \
(FRAGP)->tc_frag_data.classified = 0; \
(FRAGP)->tc_frag_data.branch_type = 0; \
(FRAGP)->tc_frag_data.mf_type = 0; \
} \
while (0)

View File

@ -0,0 +1,78 @@
#as: -mbranches-within-32B-boundaries
#objdump: -dw
.*: +file format .*
Disassembly of section .text:
0+ <foo>:
0: 65 a3 01 00 00 00 mov %eax,%gs:0x1
6: 55 push %ebp
7: 55 push %ebp
8: 55 push %ebp
9: 55 push %ebp
a: 89 e5 mov %esp,%ebp
c: 89 7d f8 mov %edi,-0x8\(%ebp\)
f: 89 75 f4 mov %esi,-0xc\(%ebp\)
12: 89 75 f4 mov %esi,-0xc\(%ebp\)
15: 89 75 f4 mov %esi,-0xc\(%ebp\)
18: 89 75 f4 mov %esi,-0xc\(%ebp\)
1b: 89 75 f4 mov %esi,-0xc\(%ebp\)
1e: 39 c5 cmp %eax,%ebp
20: 70 62 jo 84 <foo\+0x84>
22: 89 73 f4 mov %esi,-0xc\(%ebx\)
25: 89 75 f4 mov %esi,-0xc\(%ebp\)
28: 89 7d f8 mov %edi,-0x8\(%ebp\)
2b: 89 75 f4 mov %esi,-0xc\(%ebp\)
2e: 89 75 f4 mov %esi,-0xc\(%ebp\)
31: 89 75 f4 mov %esi,-0xc\(%ebp\)
34: 89 75 f4 mov %esi,-0xc\(%ebp\)
37: 89 75 f4 mov %esi,-0xc\(%ebp\)
3a: 5d pop %ebp
3b: 5d pop %ebp
3c: 5d pop %ebp
3d: 74 45 je 84 <foo\+0x84>
3f: 5d pop %ebp
40: 74 42 je 84 <foo\+0x84>
42: 89 44 24 fc mov %eax,-0x4\(%esp\)
46: 89 75 f4 mov %esi,-0xc\(%ebp\)
49: 89 7d f8 mov %edi,-0x8\(%ebp\)
4c: 89 75 f4 mov %esi,-0xc\(%ebp\)
4f: 89 75 f4 mov %esi,-0xc\(%ebp\)
52: 89 75 f4 mov %esi,-0xc\(%ebp\)
55: 89 75 f4 mov %esi,-0xc\(%ebp\)
58: 89 75 f4 mov %esi,-0xc\(%ebp\)
5b: 5d pop %ebp
5c: eb 2c jmp 8a <foo\+0x8a>
5e: 66 90 xchg %ax,%ax
60: eb 28 jmp 8a <foo\+0x8a>
62: eb 26 jmp 8a <foo\+0x8a>
64: 89 45 fc mov %eax,-0x4\(%ebp\)
67: 89 75 f4 mov %esi,-0xc\(%ebp\)
6a: 89 7d f8 mov %edi,-0x8\(%ebp\)
6d: 5d pop %ebp
6e: 5d pop %ebp
6f: 40 inc %eax
70: 72 12 jb 84 <foo\+0x84>
72: 36 36 89 45 fc ss mov %eax,%ss:-0x4\(%ebp\)
77: 89 75 f4 mov %esi,-0xc\(%ebp\)
7a: 89 7d f8 mov %edi,-0x8\(%ebp\)
7d: 89 75 f4 mov %esi,-0xc\(%ebp\)
80: 21 c3 and %eax,%ebx
82: 7c 06 jl 8a <foo\+0x8a>
84: 8b 45 f4 mov -0xc\(%ebp\),%eax
87: 89 45 fc mov %eax,-0x4\(%ebp\)
8a: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
90: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
96: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
9c: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
a2: 89 75 0c mov %esi,0xc\(%ebp\)
a5: e9 fc ff ff ff jmp a6 <foo\+0xa6>
aa: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
b0: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
b6: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
bc: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
c2: 89 75 00 mov %esi,0x0\(%ebp\)
c5: 74 c3 je 8a <foo\+0x8a>
c7: 74 c1 je 8a <foo\+0x8a>
#pass

View File

@ -0,0 +1,74 @@
.text
.globl foo
.p2align 4
foo:
movl %eax, %gs:0x1
pushl %ebp
pushl %ebp
pushl %ebp
pushl %ebp
movl %esp, %ebp
movl %edi, -8(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
cmp %eax, %ebp
jo .L_2
movl %esi, -12(%ebx)
movl %esi, -12(%ebp)
movl %edi, -8(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
popl %ebp
popl %ebp
popl %ebp
je .L_2
popl %ebp
je .L_2
movl %eax, -4(%esp)
movl %esi, -12(%ebp)
movl %edi, -8(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
movl %esi, -12(%ebp)
popl %ebp
jmp .L_3
jmp .L_3
jmp .L_3
movl %eax, -4(%ebp)
movl %esi, -12(%ebp)
movl %edi, -8(%ebp)
popl %ebp
popl %ebp
inc %eax
jc .L_2
movl %eax, -4(%ebp)
movl %esi, -12(%ebp)
movl %edi, -8(%ebp)
movl %esi, -12(%ebp)
and %eax, %ebx
jl .L_3
.L_2:
movl -12(%ebp), %eax
movl %eax, -4(%ebp)
.L_3:
movl %esi, -1200(%ebp)
movl %esi, -1200(%ebp)
movl %esi, -1200(%ebp)
movl %esi, -1200(%ebp)
movl %esi, 12(%ebp)
jmp bar
movl %esi, -1200(%ebp)
movl %esi, -1200(%ebp)
movl %esi, -1200(%ebp)
movl %esi, -1200(%ebp)
movl %esi, (%ebp)
je .L_3
je .L_3

View File

@ -525,6 +525,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]]
run_dump_test "align-branch-6"
run_dump_test "align-branch-7"
run_dump_test "align-branch-8"
run_dump_test "align-branch-9"
# These tests require support for 8 and 16 bit relocs,
# so we only run them for ELF and COFF targets.
@ -1100,6 +1101,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t
run_dump_test "x86-64-align-branch-6"
run_dump_test "x86-64-align-branch-7"
run_dump_test "x86-64-align-branch-8"
run_dump_test "x86-64-align-branch-9"
if { ![istarget "*-*-aix*"]
&& ![istarget "*-*-beos*"]

View File

@ -0,0 +1,46 @@
#as: -mbranches-within-32B-boundaries
#objdump: -dw
.*: +file format .*
Disassembly of section .text:
0+ <foo>:
0: c1 e9 02 shr \$0x2,%ecx
3: c1 e9 02 shr \$0x2,%ecx
6: c1 e9 02 shr \$0x2,%ecx
9: 89 d1 mov %edx,%ecx
b: 31 c0 xor %eax,%eax
d: c1 e9 02 shr \$0x2,%ecx
10: c1 e9 02 shr \$0x2,%ecx
13: c1 e9 02 shr \$0x2,%ecx
16: c1 e9 02 shr \$0x2,%ecx
19: c1 e9 02 shr \$0x2,%ecx
1c: c1 e9 02 shr \$0x2,%ecx
1f: 80 fa 02 cmp \$0x2,%dl
22: 70 df jo 3 <foo\+0x3>
24: 2e 2e 2e 2e 31 c0 cs cs cs cs xor %eax,%eax
2a: c1 e9 02 shr \$0x2,%ecx
2d: c1 e9 02 shr \$0x2,%ecx
30: c1 e9 02 shr \$0x2,%ecx
33: 89 d1 mov %edx,%ecx
35: 31 c0 xor %eax,%eax
37: c1 e9 02 shr \$0x2,%ecx
3a: c1 e9 02 shr \$0x2,%ecx
3d: c1 e9 02 shr \$0x2,%ecx
40: f6 c2 02 test \$0x2,%dl
43: 75 e8 jne 2d <foo\+0x2d>
45: 31 c0 xor %eax,%eax
47: c1 e9 02 shr \$0x2,%ecx
4a: c1 e9 02 shr \$0x2,%ecx
4d: 89 d1 mov %edx,%ecx
4f: c1 e9 02 shr \$0x2,%ecx
52: c1 e9 02 shr \$0x2,%ecx
55: 89 d1 mov %edx,%ecx
57: c1 e9 02 shr \$0x2,%ecx
5a: 89 d1 mov %edx,%ecx
5c: 31 c0 xor %eax,%eax
5e: ff c0 inc %eax
60: 76 cb jbe 2d <foo\+0x2d>
62: 31 c0 xor %eax,%eax
#pass

View File

@ -0,0 +1,43 @@
.text
.p2align 4,,15
foo:
shrl $2, %ecx
.L1:
shrl $2, %ecx
shrl $2, %ecx
movl %edx, %ecx
xorl %eax, %eax
shrl $2, %ecx
shrl $2, %ecx
shrl $2, %ecx
shrl $2, %ecx
shrl $2, %ecx
shrl $2, %ecx
cmpb $2, %dl
jo .L1
xorl %eax, %eax
shrl $2, %ecx
.L2:
shrl $2, %ecx
shrl $2, %ecx
movl %edx, %ecx
xorl %eax, %eax
shrl $2, %ecx
shrl $2, %ecx
shrl $2, %ecx
testb $2, %dl
jne .L2
xorl %eax, %eax
.L3:
shrl $2, %ecx
shrl $2, %ecx
movl %edx, %ecx
shrl $2, %ecx
shrl $2, %ecx
movl %edx, %ecx
shrl $2, %ecx
movl %edx, %ecx
xorl %eax, %eax
inc %eax
jbe .L2
xorl %eax, %eax