Handle long jumps for .code16 and .arch < 386 by using a two

instruction sequence consisting of a conditional jump of the
opposite sense around an unconditional jump to the target.
Add jumps/nojumps .arch modifier.
This commit is contained in:
Alan Modra 2001-02-13 12:44:19 +00:00
parent c90cedf93e
commit fddf5b5bc2
3 changed files with 193 additions and 77 deletions

View File

@ -1,5 +1,25 @@
2001-02-13 Alan Modra <alan@linuxcare.com.au>
* doc/c-i386.texi (i386-Arch): Add "jumps"/"nojumps" blurb.
Mention effect of < 386 architectures on jump promotion.
(i386-Jumps): xref above. Don't assume long disp is 32 bits.
* config/tc-i386.c (no_cond_jump_promotion): New.
(set_cpu_arch): Parse "jumps" arch modifier.
(insn_size): Modify usage comment.
(ENCODE_RELAX_STATE): Reformat and protect macro arg.
(SIZE_FROM_RELAX_STATE): Rename to DISP_SIZE_FROM_RELAX_STATE.
(TYPE_FROM_RELAX_STATE): New define.
(UNCOND_JUMP, COND_JUMP): Renumber.
(md_relax_table): Reorder to suit.
(COND_JUMP86): New define.
(md_relax_table): Handle COND_JUMP86 cases. Add a few comments.
(md_assemble): Create frag var for jumps of max size, encode relax
state for COND_JUMP86.
(md_estimate_size_before_relax): Handle COND_JUMP86 cases, and
leave conditional jumps small if no_cond_jump_promotion.
(md_convert_frag): Likewise.
* expr.c (operator): Don't bump input_line_pointer for two char
operators. Instead return operator size via new param num_chars.
(expr): Use above to parse multi-char operators correctly.

View File

@ -273,15 +273,20 @@ static const char *cpu_arch_name = NULL;
/* CPU feature flags. */
static unsigned int cpu_arch_flags = CpuUnknownFlags|CpuNo64;
/* If set, conditional jumps are not automatically promoted to handle
larger than a byte offset. */
static unsigned int no_cond_jump_promotion = 0;
/* Interface to relax_segment.
There are 2 relax states for 386 jump insns: one for conditional &
one for unconditional jumps. This is because these two types of
jumps add different sizes to frags when we're figuring out what
sort of jump to choose to reach a given label. */
There are 3 major relax states for 386 jump insns because the
different types of jumps add different sizes to frags when we're
figuring out what sort of jump to choose to reach a given label. */
/* Types. */
#define COND_JUMP 1
#define UNCOND_JUMP 2
#define UNCOND_JUMP 1
#define COND_JUMP 2
#define COND_JUMP86 3
/* Sizes. */
#define CODE16 1
#define SMALL 0
@ -297,10 +302,12 @@ static unsigned int cpu_arch_flags = CpuUnknownFlags|CpuNo64;
#endif
#endif
#define ENCODE_RELAX_STATE(type,size) \
((relax_substateT) ((type<<2) | (size)))
#define SIZE_FROM_RELAX_STATE(s) \
( (((s) & 0x3) == BIG ? 4 : (((s) & 0x3) == BIG16 ? 2 : 1)) )
#define ENCODE_RELAX_STATE(type, size) \
((relax_substateT) (((type) << 2) | (size)))
#define TYPE_FROM_RELAX_STATE(s) \
((s) >> 2)
#define DISP_SIZE_FROM_RELAX_STATE(s) \
((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
/* This table is used by relax_frag to promote short jumps to long
ones where necessary. SMALL (short) jumps may be promoted to BIG
@ -322,6 +329,17 @@ const relax_typeS md_relax_table[] =
{1, 1, 0, 0},
{1, 1, 0, 0},
/* UNCOND_JUMP states. */
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
/* dword jmp adds 3 bytes to frag:
0 extra opcode bytes, 3 extra displacement bytes. */
{0, 0, 3, 0},
/* word jmp adds 1 byte to frag:
0 extra opcode bytes, 1 extra displacement byte. */
{0, 0, 1, 0},
/* COND_JUMP states. */
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
/* dword conditionals adds 4 bytes to frag:
@ -331,15 +349,15 @@ const relax_typeS md_relax_table[] =
1 extra opcode byte, 1 extra displacement byte. */
{0, 0, 2, 0},
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
/* dword jmp adds 3 bytes to frag:
0 extra opcode bytes, 3 extra displacement bytes. */
{0, 0, 3, 0},
/* word jmp adds 1 byte to frag:
0 extra opcode bytes, 1 extra displacement byte. */
{0, 0, 1, 0}
/* COND_JUMP86 states. */
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
/* dword conditionals adds 4 bytes to frag:
1 extra opcode byte, 3 extra displacement bytes. */
{0, 0, 4, 0},
/* word conditionals add 3 bytes to frag:
1 extra opcode byte, 2 extra displacement bytes. */
{0, 0, 3, 0}
};
static const arch_entry cpu_arch[] = {
@ -726,7 +744,8 @@ set_cpu_arch (dummy)
if (strcmp (string, cpu_arch[i].name) == 0)
{
cpu_arch_name = cpu_arch[i].name;
cpu_arch_flags = cpu_arch[i].flags | (flag_code == CODE_64BIT ? Cpu64 : CpuNo64);
cpu_arch_flags = (cpu_arch[i].flags
| (flag_code == CODE_64BIT ? Cpu64 : CpuNo64));
break;
}
}
@ -738,6 +757,23 @@ set_cpu_arch (dummy)
else
as_bad (_("missing cpu architecture"));
no_cond_jump_promotion = 0;
if (*input_line_pointer == ','
&& ! is_end_of_line[(unsigned char) input_line_pointer[1]])
{
char *string = ++input_line_pointer;
int e = get_symbol_end ();
if (strcmp (string, "nojumps") == 0)
no_cond_jump_promotion = 1;
else if (strcmp (string, "jumps") == 0)
;
else
as_bad (_("no such architecture modifier: `%s'"), string);
*input_line_pointer = e;
}
demand_empty_rest_of_line ();
}
@ -1197,7 +1233,8 @@ md_assemble (line)
/* Points to template once we've found it. */
const template *t;
/* Count the size of the instruction generated. */
/* Count the size of the instruction generated. Does not include
variable part of jump insns before relax. */
int insn_size = 0;
int j;
@ -2671,7 +2708,6 @@ md_assemble (line)
/* Output jumps. */
if (i.tm.opcode_modifier & Jump)
{
int size;
int code16;
int prefix;
@ -2692,10 +2728,6 @@ md_assemble (line)
i.prefixes--;
}
size = 4;
if (code16)
size = 2;
if (i.prefixes != 0 && !intel_syntax)
as_warn (_("skipping prefixes on this instruction"));
@ -2704,7 +2736,7 @@ md_assemble (line)
instruction we may generate in md_convert_frag. This is 2
bytes for the opcode and room for the prefix and largest
displacement. */
frag_grow (prefix + 2 + size);
frag_grow (prefix + 2 + 4);
insn_size += prefix + 1;
/* Prefix and 1 opcode byte go in fr_fix. */
p = frag_more (prefix + 1);
@ -2716,11 +2748,13 @@ md_assemble (line)
/* 1 possible extra opcode + displacement go in var part.
Pass reloc in fr_var. */
frag_var (rs_machine_dependent,
1 + size,
1 + 4,
i.disp_reloc[0],
((unsigned char) *p == JUMP_PC_RELATIVE
? ENCODE_RELAX_STATE (UNCOND_JUMP, SMALL) | code16
: ENCODE_RELAX_STATE (COND_JUMP, SMALL) | code16),
: ((cpu_arch_flags & Cpu386) != 0
? ENCODE_RELAX_STATE (COND_JUMP, SMALL) | code16
: ENCODE_RELAX_STATE (COND_JUMP86, SMALL) | code16)),
i.op[0].disps->X_add_symbol,
i.op[0].disps->X_add_number,
p);
@ -3909,10 +3943,10 @@ md_estimate_size_before_relax (fragP, segment)
old_fr_fix = fragP->fr_fix;
opcode = (unsigned char *) fragP->fr_opcode;
switch (opcode[0])
switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
{
case JUMP_PC_RELATIVE:
/* Make jmp (0xeb) a dword displacement jump. */
case UNCOND_JUMP:
/* Make jmp (0xeb) a (d)word displacement jump. */
opcode[0] = 0xe9;
fragP->fr_fix += size;
fix_new (fragP, old_fr_fix, size,
@ -3921,9 +3955,33 @@ md_estimate_size_before_relax (fragP, segment)
reloc_type);
break;
default:
case COND_JUMP86:
if (no_cond_jump_promotion)
return 1;
if (size == 2)
{
/* Negate the condition, and branch past an
unconditional jump. */
opcode[0] ^= 1;
opcode[1] = 3;
/* Insert an unconditional jump. */
opcode[2] = 0xe9;
/* We added two extra opcode bytes, and have a two byte
offset. */
fragP->fr_fix += 2 + 2;
fix_new (fragP, old_fr_fix + 2, 2,
fragP->fr_symbol,
fragP->fr_offset, 1,
reloc_type);
break;
}
/* Fall through. */
case COND_JUMP:
if (no_cond_jump_promotion)
return 1;
/* This changes the byte-displacement jump 0x7N
to the dword-displacement jump 0x0f,0x8N. */
to the (d)word-displacement jump 0x0f,0x8N. */
opcode[1] = opcode[0] + 0x10;
opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
/* We've added an opcode byte. */
@ -3933,6 +3991,10 @@ md_estimate_size_before_relax (fragP, segment)
fragP->fr_offset, 1,
reloc_type);
break;
default:
BAD_CASE (fragP->fr_subtype);
break;
}
frag_wane (fragP);
return fragP->fr_fix - old_fr_fix;
@ -3986,51 +4048,65 @@ md_convert_frag (abfd, sec, fragP)
/* Displacement from opcode start to fill into instruction. */
displacement_from_opcode_start = target_address - opcode_address;
switch (fragP->fr_subtype)
if ((fragP->fr_subtype & BIG) == 0)
{
case ENCODE_RELAX_STATE (COND_JUMP, SMALL):
case ENCODE_RELAX_STATE (COND_JUMP, SMALL16):
case ENCODE_RELAX_STATE (UNCOND_JUMP, SMALL):
case ENCODE_RELAX_STATE (UNCOND_JUMP, SMALL16):
/* Don't have to change opcode. */
extension = 1; /* 1 opcode + 1 displacement */
where_to_put_displacement = &opcode[1];
break;
case ENCODE_RELAX_STATE (COND_JUMP, BIG):
extension = 5; /* 2 opcode + 4 displacement */
opcode[1] = opcode[0] + 0x10;
opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
where_to_put_displacement = &opcode[2];
break;
case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
extension = 4; /* 1 opcode + 4 displacement */
opcode[0] = 0xe9;
where_to_put_displacement = &opcode[1];
break;
case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
extension = 3; /* 2 opcode + 2 displacement */
opcode[1] = opcode[0] + 0x10;
opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
where_to_put_displacement = &opcode[2];
break;
case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
extension = 2; /* 1 opcode + 2 displacement */
opcode[0] = 0xe9;
where_to_put_displacement = &opcode[1];
break;
default:
BAD_CASE (fragP->fr_subtype);
break;
}
else
{
if (no_cond_jump_promotion
&& TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
as_warn_where (fragP->fr_file, fragP->fr_line, _("long jump required"));
switch (fragP->fr_subtype)
{
case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
extension = 4; /* 1 opcode + 4 displacement */
opcode[0] = 0xe9;
where_to_put_displacement = &opcode[1];
break;
case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
extension = 2; /* 1 opcode + 2 displacement */
opcode[0] = 0xe9;
where_to_put_displacement = &opcode[1];
break;
case ENCODE_RELAX_STATE (COND_JUMP, BIG):
case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
extension = 5; /* 2 opcode + 4 displacement */
opcode[1] = opcode[0] + 0x10;
opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
where_to_put_displacement = &opcode[2];
break;
case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
extension = 3; /* 2 opcode + 2 displacement */
opcode[1] = opcode[0] + 0x10;
opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
where_to_put_displacement = &opcode[2];
break;
case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
extension = 4;
opcode[0] ^= 1;
opcode[1] = 3;
opcode[2] = 0xe9;
where_to_put_displacement = &opcode[3];
break;
default:
BAD_CASE (fragP->fr_subtype);
break;
}
}
/* Now put displacement after opcode. */
md_number_to_chars ((char *) where_to_put_displacement,
(valueT) (displacement_from_opcode_start - extension),
SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
fragP->fr_fix += extension;
}

View File

@ -26,7 +26,7 @@ extending the Intel architecture to 64-bits.
* i386-Regs:: Register Naming
* i386-Prefixes:: Instruction Prefixes
* i386-Memory:: Memory References
* i386-jumps:: Handling of Jump Instructions
* i386-Jumps:: Handling of Jump Instructions
* i386-Float:: Floating Point
* i386-SIMD:: Intel's MMX and AMD's 3DNow! SIMD Operations
* i386-16bit:: Writing 16-bit Code
@ -488,7 +488,7 @@ the default absolute addressing.
Other addressing modes remain unchanged in x86-64 architecture, except
registers used are 64-bit instead of 32-bit.
@node i386-jumps
@node i386-Jumps
@section Handling of Jump Instructions
@cindex jump optimization, i386
@ -498,11 +498,11 @@ registers used are 64-bit instead of 32-bit.
Jump instructions are always optimized to use the smallest possible
displacements. This is accomplished by using byte (8-bit) displacement
jumps whenever the target is sufficiently close. If a byte displacement
is insufficient a long (32-bit) displacement is used. We do not support
is insufficient a long displacement is used. We do not support
word (16-bit) displacement jumps in 32-bit mode (i.e. prefixing the jump
instruction with the @samp{data16} instruction prefix), since the 80386
insists upon masking @samp{%eip} to 16 bits after the word displacement
is added.
is added. (See also @pxref{i386-Arch})
Note that the @samp{jcxz}, @samp{jecxz}, @samp{loop}, @samp{loopz},
@samp{loope}, @samp{loopnz} and @samp{loopne} instructions only come in byte
@ -696,13 +696,33 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{sledgehammer}
@end multitable
Apart from the warning, there is only one other effect on
@code{@value{AS}} operation; If you specify a CPU other than
Apart from the warning, there are only two other effects on
@code{@value{AS}} operation; Firstly, if you specify a CPU other than
@samp{i486}, then shift by one instructions such as @samp{sarl $1, %eax}
will automatically use a two byte opcode sequence. The larger three
byte opcode sequence is used on the 486 (and when no architecture is
specified) because it executes faster on the 486. Note that you can
explicitly request the two byte opcode by writing @samp{sarl %eax}.
Secondly, if you specify @samp{i8086}, @samp{i186}, or @samp{i286},
@emph{and} @samp{.code16} or @samp{.code16gcc} then byte offset
conditional jumps will be promoted when necessary to a two instruction
sequence consisting of a conditional jump of the opposite sense around
an unconditional jump to the target.
Following the CPU architecture, you may specify @samp{jumps} or
@samp{nojumps} to control automatic promotion of conditional jumps.
@samp{jumps} is the default, and enables jump promotion; All external
jumps will be of the long variety, and file-local jumps will be promoted
as necessary. (@pxref{i386-Jumps}) @samp{nojumps} leaves external
conditional jumps as byte offset jumps, and warns about file-local
conditional jumps that @code{@value{AS}} promotes.
Unconditional jumps are treated as for @samp{jumps}.
For example
@smallexample
.arch i8086,nojumps
@end smallexample
@node i386-Notes
@section Notes