Handle long jumps for .code16 and .arch < 386 by using a two

instruction sequence consisting of a conditional jump of the
opposite sense around an unconditional jump to the target.
Add jumps/nojumps .arch modifier.
This commit is contained in:
Alan Modra 2001-02-13 12:44:19 +00:00
parent c90cedf93e
commit fddf5b5bc2
3 changed files with 193 additions and 77 deletions

View File

@ -1,5 +1,25 @@
2001-02-13 Alan Modra <alan@linuxcare.com.au> 2001-02-13 Alan Modra <alan@linuxcare.com.au>
* doc/c-i386.texi (i386-Arch): Add "jumps"/"nojumps" blurb.
Mention effect of < 386 architectures on jump promotion.
(i386-Jumps): xref above. Don't assume long disp is 32 bits.
* config/tc-i386.c (no_cond_jump_promotion): New.
(set_cpu_arch): Parse "jumps" arch modifier.
(insn_size): Modify usage comment.
(ENCODE_RELAX_STATE): Reformat and protect macro arg.
(SIZE_FROM_RELAX_STATE): Rename to DISP_SIZE_FROM_RELAX_STATE.
(TYPE_FROM_RELAX_STATE): New define.
(UNCOND_JUMP, COND_JUMP): Renumber.
(md_relax_table): Reorder to suit.
(COND_JUMP86): New define.
(md_relax_table): Handle COND_JUMP86 cases. Add a few comments.
(md_assemble): Create frag var for jumps of max size, encode relax
state for COND_JUMP86.
(md_estimate_size_before_relax): Handle COND_JUMP86 cases, and
leave conditional jumps small if no_cond_jump_promotion.
(md_convert_frag): Likewise.
* expr.c (operator): Don't bump input_line_pointer for two char * expr.c (operator): Don't bump input_line_pointer for two char
operators. Instead return operator size via new param num_chars. operators. Instead return operator size via new param num_chars.
(expr): Use above to parse multi-char operators correctly. (expr): Use above to parse multi-char operators correctly.

View File

@ -273,15 +273,20 @@ static const char *cpu_arch_name = NULL;
/* CPU feature flags. */ /* CPU feature flags. */
static unsigned int cpu_arch_flags = CpuUnknownFlags|CpuNo64; static unsigned int cpu_arch_flags = CpuUnknownFlags|CpuNo64;
/* If set, conditional jumps are not automatically promoted to handle
larger than a byte offset. */
static unsigned int no_cond_jump_promotion = 0;
/* Interface to relax_segment. /* Interface to relax_segment.
There are 2 relax states for 386 jump insns: one for conditional & There are 3 major relax states for 386 jump insns because the
one for unconditional jumps. This is because these two types of different types of jumps add different sizes to frags when we're
jumps add different sizes to frags when we're figuring out what figuring out what sort of jump to choose to reach a given label. */
sort of jump to choose to reach a given label. */
/* Types. */ /* Types. */
#define COND_JUMP 1 #define UNCOND_JUMP 1
#define UNCOND_JUMP 2 #define COND_JUMP 2
#define COND_JUMP86 3
/* Sizes. */ /* Sizes. */
#define CODE16 1 #define CODE16 1
#define SMALL 0 #define SMALL 0
@ -297,10 +302,12 @@ static unsigned int cpu_arch_flags = CpuUnknownFlags|CpuNo64;
#endif #endif
#endif #endif
#define ENCODE_RELAX_STATE(type,size) \ #define ENCODE_RELAX_STATE(type, size) \
((relax_substateT) ((type<<2) | (size))) ((relax_substateT) (((type) << 2) | (size)))
#define SIZE_FROM_RELAX_STATE(s) \ #define TYPE_FROM_RELAX_STATE(s) \
( (((s) & 0x3) == BIG ? 4 : (((s) & 0x3) == BIG16 ? 2 : 1)) ) ((s) >> 2)
#define DISP_SIZE_FROM_RELAX_STATE(s) \
((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
/* This table is used by relax_frag to promote short jumps to long /* This table is used by relax_frag to promote short jumps to long
ones where necessary. SMALL (short) jumps may be promoted to BIG ones where necessary. SMALL (short) jumps may be promoted to BIG
@ -322,6 +329,17 @@ const relax_typeS md_relax_table[] =
{1, 1, 0, 0}, {1, 1, 0, 0},
{1, 1, 0, 0}, {1, 1, 0, 0},
/* UNCOND_JUMP states. */
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
/* dword jmp adds 3 bytes to frag:
0 extra opcode bytes, 3 extra displacement bytes. */
{0, 0, 3, 0},
/* word jmp adds 1 byte to frag:
0 extra opcode bytes, 1 extra displacement byte. */
{0, 0, 1, 0},
/* COND_JUMP states. */
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (COND_JUMP, BIG)}, {127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (COND_JUMP, BIG16)}, {127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
/* dword conditionals adds 4 bytes to frag: /* dword conditionals adds 4 bytes to frag:
@ -331,15 +349,15 @@ const relax_typeS md_relax_table[] =
1 extra opcode byte, 1 extra displacement byte. */ 1 extra opcode byte, 1 extra displacement byte. */
{0, 0, 2, 0}, {0, 0, 2, 0},
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)}, /* COND_JUMP86 states. */
{127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)}, {127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
/* dword jmp adds 3 bytes to frag: {127 + 1, -128 + 1, 0, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
0 extra opcode bytes, 3 extra displacement bytes. */ /* dword conditionals adds 4 bytes to frag:
{0, 0, 3, 0}, 1 extra opcode byte, 3 extra displacement bytes. */
/* word jmp adds 1 byte to frag: {0, 0, 4, 0},
0 extra opcode bytes, 1 extra displacement byte. */ /* word conditionals add 3 bytes to frag:
{0, 0, 1, 0} 1 extra opcode byte, 2 extra displacement bytes. */
{0, 0, 3, 0}
}; };
static const arch_entry cpu_arch[] = { static const arch_entry cpu_arch[] = {
@ -726,7 +744,8 @@ set_cpu_arch (dummy)
if (strcmp (string, cpu_arch[i].name) == 0) if (strcmp (string, cpu_arch[i].name) == 0)
{ {
cpu_arch_name = cpu_arch[i].name; cpu_arch_name = cpu_arch[i].name;
cpu_arch_flags = cpu_arch[i].flags | (flag_code == CODE_64BIT ? Cpu64 : CpuNo64); cpu_arch_flags = (cpu_arch[i].flags
| (flag_code == CODE_64BIT ? Cpu64 : CpuNo64));
break; break;
} }
} }
@ -738,6 +757,23 @@ set_cpu_arch (dummy)
else else
as_bad (_("missing cpu architecture")); as_bad (_("missing cpu architecture"));
no_cond_jump_promotion = 0;
if (*input_line_pointer == ','
&& ! is_end_of_line[(unsigned char) input_line_pointer[1]])
{
char *string = ++input_line_pointer;
int e = get_symbol_end ();
if (strcmp (string, "nojumps") == 0)
no_cond_jump_promotion = 1;
else if (strcmp (string, "jumps") == 0)
;
else
as_bad (_("no such architecture modifier: `%s'"), string);
*input_line_pointer = e;
}
demand_empty_rest_of_line (); demand_empty_rest_of_line ();
} }
@ -1197,7 +1233,8 @@ md_assemble (line)
/* Points to template once we've found it. */ /* Points to template once we've found it. */
const template *t; const template *t;
/* Count the size of the instruction generated. */ /* Count the size of the instruction generated. Does not include
variable part of jump insns before relax. */
int insn_size = 0; int insn_size = 0;
int j; int j;
@ -2671,7 +2708,6 @@ md_assemble (line)
/* Output jumps. */ /* Output jumps. */
if (i.tm.opcode_modifier & Jump) if (i.tm.opcode_modifier & Jump)
{ {
int size;
int code16; int code16;
int prefix; int prefix;
@ -2692,10 +2728,6 @@ md_assemble (line)
i.prefixes--; i.prefixes--;
} }
size = 4;
if (code16)
size = 2;
if (i.prefixes != 0 && !intel_syntax) if (i.prefixes != 0 && !intel_syntax)
as_warn (_("skipping prefixes on this instruction")); as_warn (_("skipping prefixes on this instruction"));
@ -2704,7 +2736,7 @@ md_assemble (line)
instruction we may generate in md_convert_frag. This is 2 instruction we may generate in md_convert_frag. This is 2
bytes for the opcode and room for the prefix and largest bytes for the opcode and room for the prefix and largest
displacement. */ displacement. */
frag_grow (prefix + 2 + size); frag_grow (prefix + 2 + 4);
insn_size += prefix + 1; insn_size += prefix + 1;
/* Prefix and 1 opcode byte go in fr_fix. */ /* Prefix and 1 opcode byte go in fr_fix. */
p = frag_more (prefix + 1); p = frag_more (prefix + 1);
@ -2716,11 +2748,13 @@ md_assemble (line)
/* 1 possible extra opcode + displacement go in var part. /* 1 possible extra opcode + displacement go in var part.
Pass reloc in fr_var. */ Pass reloc in fr_var. */
frag_var (rs_machine_dependent, frag_var (rs_machine_dependent,
1 + size, 1 + 4,
i.disp_reloc[0], i.disp_reloc[0],
((unsigned char) *p == JUMP_PC_RELATIVE ((unsigned char) *p == JUMP_PC_RELATIVE
? ENCODE_RELAX_STATE (UNCOND_JUMP, SMALL) | code16 ? ENCODE_RELAX_STATE (UNCOND_JUMP, SMALL) | code16
: ENCODE_RELAX_STATE (COND_JUMP, SMALL) | code16), : ((cpu_arch_flags & Cpu386) != 0
? ENCODE_RELAX_STATE (COND_JUMP, SMALL) | code16
: ENCODE_RELAX_STATE (COND_JUMP86, SMALL) | code16)),
i.op[0].disps->X_add_symbol, i.op[0].disps->X_add_symbol,
i.op[0].disps->X_add_number, i.op[0].disps->X_add_number,
p); p);
@ -3909,10 +3943,10 @@ md_estimate_size_before_relax (fragP, segment)
old_fr_fix = fragP->fr_fix; old_fr_fix = fragP->fr_fix;
opcode = (unsigned char *) fragP->fr_opcode; opcode = (unsigned char *) fragP->fr_opcode;
switch (opcode[0]) switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
{ {
case JUMP_PC_RELATIVE: case UNCOND_JUMP:
/* Make jmp (0xeb) a dword displacement jump. */ /* Make jmp (0xeb) a (d)word displacement jump. */
opcode[0] = 0xe9; opcode[0] = 0xe9;
fragP->fr_fix += size; fragP->fr_fix += size;
fix_new (fragP, old_fr_fix, size, fix_new (fragP, old_fr_fix, size,
@ -3921,9 +3955,33 @@ md_estimate_size_before_relax (fragP, segment)
reloc_type); reloc_type);
break; break;
default: case COND_JUMP86:
if (no_cond_jump_promotion)
return 1;
if (size == 2)
{
/* Negate the condition, and branch past an
unconditional jump. */
opcode[0] ^= 1;
opcode[1] = 3;
/* Insert an unconditional jump. */
opcode[2] = 0xe9;
/* We added two extra opcode bytes, and have a two byte
offset. */
fragP->fr_fix += 2 + 2;
fix_new (fragP, old_fr_fix + 2, 2,
fragP->fr_symbol,
fragP->fr_offset, 1,
reloc_type);
break;
}
/* Fall through. */
case COND_JUMP:
if (no_cond_jump_promotion)
return 1;
/* This changes the byte-displacement jump 0x7N /* This changes the byte-displacement jump 0x7N
to the dword-displacement jump 0x0f,0x8N. */ to the (d)word-displacement jump 0x0f,0x8N. */
opcode[1] = opcode[0] + 0x10; opcode[1] = opcode[0] + 0x10;
opcode[0] = TWO_BYTE_OPCODE_ESCAPE; opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
/* We've added an opcode byte. */ /* We've added an opcode byte. */
@ -3933,6 +3991,10 @@ md_estimate_size_before_relax (fragP, segment)
fragP->fr_offset, 1, fragP->fr_offset, 1,
reloc_type); reloc_type);
break; break;
default:
BAD_CASE (fragP->fr_subtype);
break;
} }
frag_wane (fragP); frag_wane (fragP);
return fragP->fr_fix - old_fr_fix; return fragP->fr_fix - old_fr_fix;
@ -3986,51 +4048,65 @@ md_convert_frag (abfd, sec, fragP)
/* Displacement from opcode start to fill into instruction. */ /* Displacement from opcode start to fill into instruction. */
displacement_from_opcode_start = target_address - opcode_address; displacement_from_opcode_start = target_address - opcode_address;
switch (fragP->fr_subtype) if ((fragP->fr_subtype & BIG) == 0)
{ {
case ENCODE_RELAX_STATE (COND_JUMP, SMALL):
case ENCODE_RELAX_STATE (COND_JUMP, SMALL16):
case ENCODE_RELAX_STATE (UNCOND_JUMP, SMALL):
case ENCODE_RELAX_STATE (UNCOND_JUMP, SMALL16):
/* Don't have to change opcode. */ /* Don't have to change opcode. */
extension = 1; /* 1 opcode + 1 displacement */ extension = 1; /* 1 opcode + 1 displacement */
where_to_put_displacement = &opcode[1]; where_to_put_displacement = &opcode[1];
break;
case ENCODE_RELAX_STATE (COND_JUMP, BIG):
extension = 5; /* 2 opcode + 4 displacement */
opcode[1] = opcode[0] + 0x10;
opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
where_to_put_displacement = &opcode[2];
break;
case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
extension = 4; /* 1 opcode + 4 displacement */
opcode[0] = 0xe9;
where_to_put_displacement = &opcode[1];
break;
case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
extension = 3; /* 2 opcode + 2 displacement */
opcode[1] = opcode[0] + 0x10;
opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
where_to_put_displacement = &opcode[2];
break;
case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
extension = 2; /* 1 opcode + 2 displacement */
opcode[0] = 0xe9;
where_to_put_displacement = &opcode[1];
break;
default:
BAD_CASE (fragP->fr_subtype);
break;
} }
else
{
if (no_cond_jump_promotion
&& TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
as_warn_where (fragP->fr_file, fragP->fr_line, _("long jump required"));
switch (fragP->fr_subtype)
{
case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
extension = 4; /* 1 opcode + 4 displacement */
opcode[0] = 0xe9;
where_to_put_displacement = &opcode[1];
break;
case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
extension = 2; /* 1 opcode + 2 displacement */
opcode[0] = 0xe9;
where_to_put_displacement = &opcode[1];
break;
case ENCODE_RELAX_STATE (COND_JUMP, BIG):
case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
extension = 5; /* 2 opcode + 4 displacement */
opcode[1] = opcode[0] + 0x10;
opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
where_to_put_displacement = &opcode[2];
break;
case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
extension = 3; /* 2 opcode + 2 displacement */
opcode[1] = opcode[0] + 0x10;
opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
where_to_put_displacement = &opcode[2];
break;
case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
extension = 4;
opcode[0] ^= 1;
opcode[1] = 3;
opcode[2] = 0xe9;
where_to_put_displacement = &opcode[3];
break;
default:
BAD_CASE (fragP->fr_subtype);
break;
}
}
/* Now put displacement after opcode. */ /* Now put displacement after opcode. */
md_number_to_chars ((char *) where_to_put_displacement, md_number_to_chars ((char *) where_to_put_displacement,
(valueT) (displacement_from_opcode_start - extension), (valueT) (displacement_from_opcode_start - extension),
SIZE_FROM_RELAX_STATE (fragP->fr_subtype)); DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
fragP->fr_fix += extension; fragP->fr_fix += extension;
} }

View File

@ -26,7 +26,7 @@ extending the Intel architecture to 64-bits.
* i386-Regs:: Register Naming * i386-Regs:: Register Naming
* i386-Prefixes:: Instruction Prefixes * i386-Prefixes:: Instruction Prefixes
* i386-Memory:: Memory References * i386-Memory:: Memory References
* i386-jumps:: Handling of Jump Instructions * i386-Jumps:: Handling of Jump Instructions
* i386-Float:: Floating Point * i386-Float:: Floating Point
* i386-SIMD:: Intel's MMX and AMD's 3DNow! SIMD Operations * i386-SIMD:: Intel's MMX and AMD's 3DNow! SIMD Operations
* i386-16bit:: Writing 16-bit Code * i386-16bit:: Writing 16-bit Code
@ -488,7 +488,7 @@ the default absolute addressing.
Other addressing modes remain unchanged in x86-64 architecture, except Other addressing modes remain unchanged in x86-64 architecture, except
registers used are 64-bit instead of 32-bit. registers used are 64-bit instead of 32-bit.
@node i386-jumps @node i386-Jumps
@section Handling of Jump Instructions @section Handling of Jump Instructions
@cindex jump optimization, i386 @cindex jump optimization, i386
@ -498,11 +498,11 @@ registers used are 64-bit instead of 32-bit.
Jump instructions are always optimized to use the smallest possible Jump instructions are always optimized to use the smallest possible
displacements. This is accomplished by using byte (8-bit) displacement displacements. This is accomplished by using byte (8-bit) displacement
jumps whenever the target is sufficiently close. If a byte displacement jumps whenever the target is sufficiently close. If a byte displacement
is insufficient a long (32-bit) displacement is used. We do not support is insufficient a long displacement is used. We do not support
word (16-bit) displacement jumps in 32-bit mode (i.e. prefixing the jump word (16-bit) displacement jumps in 32-bit mode (i.e. prefixing the jump
instruction with the @samp{data16} instruction prefix), since the 80386 instruction with the @samp{data16} instruction prefix), since the 80386
insists upon masking @samp{%eip} to 16 bits after the word displacement insists upon masking @samp{%eip} to 16 bits after the word displacement
is added. is added. (See also @pxref{i386-Arch})
Note that the @samp{jcxz}, @samp{jecxz}, @samp{loop}, @samp{loopz}, Note that the @samp{jcxz}, @samp{jecxz}, @samp{loop}, @samp{loopz},
@samp{loope}, @samp{loopnz} and @samp{loopne} instructions only come in byte @samp{loope}, @samp{loopnz} and @samp{loopne} instructions only come in byte
@ -696,13 +696,33 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{sledgehammer} @item @samp{sledgehammer}
@end multitable @end multitable
Apart from the warning, there is only one other effect on Apart from the warning, there are only two other effects on
@code{@value{AS}} operation; If you specify a CPU other than @code{@value{AS}} operation; Firstly, if you specify a CPU other than
@samp{i486}, then shift by one instructions such as @samp{sarl $1, %eax} @samp{i486}, then shift by one instructions such as @samp{sarl $1, %eax}
will automatically use a two byte opcode sequence. The larger three will automatically use a two byte opcode sequence. The larger three
byte opcode sequence is used on the 486 (and when no architecture is byte opcode sequence is used on the 486 (and when no architecture is
specified) because it executes faster on the 486. Note that you can specified) because it executes faster on the 486. Note that you can
explicitly request the two byte opcode by writing @samp{sarl %eax}. explicitly request the two byte opcode by writing @samp{sarl %eax}.
Secondly, if you specify @samp{i8086}, @samp{i186}, or @samp{i286},
@emph{and} @samp{.code16} or @samp{.code16gcc} then byte offset
conditional jumps will be promoted when necessary to a two instruction
sequence consisting of a conditional jump of the opposite sense around
an unconditional jump to the target.
Following the CPU architecture, you may specify @samp{jumps} or
@samp{nojumps} to control automatic promotion of conditional jumps.
@samp{jumps} is the default, and enables jump promotion; All external
jumps will be of the long variety, and file-local jumps will be promoted
as necessary. (@pxref{i386-Jumps}) @samp{nojumps} leaves external
conditional jumps as byte offset jumps, and warns about file-local
conditional jumps that @code{@value{AS}} promotes.
Unconditional jumps are treated as for @samp{jumps}.
For example
@smallexample
.arch i8086,nojumps
@end smallexample
@node i386-Notes @node i386-Notes
@section Notes @section Notes