athlon.md: Fix comment typos.
* config/i386/athlon.md: Fix comment typos. * config/i386/crtdll.h: Likewise. * config/i386/djgpp.h: Likewise. * config/i386/i386-interix.h: Likewise. * config/i386/i386.c: Likewise. * config/i386/i386.h: Likewise. * config/i386/i386.md: Likewise. * config/i386/k6.md: Likewise. * config/i386/mingw32.h: Likewise. * config/i386/pentium.md: Likewise. * config/i386/sco5.h: Likewise. * config/i386/winnt.c: Likewise. * config/i386/xmmintrin.h: Likewise. From-SVN: r60524
This commit is contained in:
parent
eab5474f6d
commit
d1f876538e
|
@ -1,3 +1,19 @@
|
|||
2002-12-26 Kazu Hirata <kazu@cs.umass.edu>
|
||||
|
||||
* config/i386/athlon.md: Fix comment typos.
|
||||
* config/i386/crtdll.h: Likewise.
|
||||
* config/i386/djgpp.h: Likewise.
|
||||
* config/i386/i386-interix.h: Likewise.
|
||||
* config/i386/i386.c: Likewise.
|
||||
* config/i386/i386.h: Likewise.
|
||||
* config/i386/i386.md: Likewise.
|
||||
* config/i386/k6.md: Likewise.
|
||||
* config/i386/mingw32.h: Likewise.
|
||||
* config/i386/pentium.md: Likewise.
|
||||
* config/i386/sco5.h: Likewise.
|
||||
* config/i386/winnt.c: Likewise.
|
||||
* config/i386/xmmintrin.h: Likewise.
|
||||
|
||||
2002-12-26 Jose Renau <renau@cs.uiuc.edu>
|
||||
|
||||
* ssa-dce.c (EXECUTE_IF_UNNECESSARY): Verify INSN is an
|
||||
|
|
|
@ -53,7 +53,7 @@
|
|||
;; is used (this is needed to allow troughput of 1.5 double decoded
|
||||
;; instructions per cycle).
|
||||
;;
|
||||
;; In order to avoid dependnece between reservation of decoder
|
||||
;; In order to avoid dependence between reservation of decoder
|
||||
;; and other units, we model decoder as two stage fully pipelined unit
|
||||
;; and only double decoded instruction may occupy unit in the first cycle.
|
||||
;; With this scheme however two double instructions can be issued cycle0.
|
||||
|
@ -74,7 +74,7 @@
|
|||
| (nothing,(athlon-decode0 + athlon-decode1))
|
||||
| (nothing,(athlon-decode1 + athlon-decode2)))")
|
||||
|
||||
;; Agu and ieu unit results in extremly large automatons and
|
||||
;; Agu and ieu unit results in extremely large automatons and
|
||||
;; in our approximation they are hardly filled in. Only ieu
|
||||
;; unit can, as issue rate is 3 and agu unit is always used
|
||||
;; first in the insn reservations. Skip the models.
|
||||
|
@ -107,7 +107,7 @@
|
|||
(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
|
||||
|
||||
|
||||
;; Jump instructions are executed in the branch unit compltetely transparent to us
|
||||
;; Jump instructions are executed in the branch unit completely transparent to us
|
||||
(define_insn_reservation "athlon_branch" 0
|
||||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(eq_attr "type" "ibr"))
|
||||
|
@ -474,7 +474,7 @@
|
|||
(and (eq_attr "cpu" "athlon,k8")
|
||||
(eq_attr "unit" "mmx"))
|
||||
"athlon-direct,athlon-faddmul")
|
||||
;; SSE operations are handled by the i387 unit as well. The latnecy
|
||||
;; SSE operations are handled by the i387 unit as well. The latency
|
||||
;; is same as for i387 operations for scalar operations
|
||||
(define_insn_reservation "athlon_sselog_load" 6
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* Operating system specific defines to be used when targeting GCC for
|
||||
hosting on Windows32, using GNU tools and the Windows32 API Library.
|
||||
This variant uses CRTDLL.DLL insted of MSVCRTDLL.DLL.
|
||||
This variant uses CRTDLL.DLL instead of MSVCRTDLL.DLL.
|
||||
Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU CC.
|
||||
|
|
|
@ -63,7 +63,7 @@ Boston, MA 02111-1307, USA. */
|
|||
|
||||
/* Define standard DJGPP installation paths. */
|
||||
/* We override default /usr or /usr/local part with /dev/env/DJDIR which */
|
||||
/* points to actual DJGPP instalation directory. */
|
||||
/* points to actual DJGPP installation directory. */
|
||||
|
||||
/* Standard include directory */
|
||||
#undef STANDARD_INCLUDE_DIR
|
||||
|
|
|
@ -36,7 +36,7 @@ Boston, MA 02111-1307, USA. */
|
|||
/* By default, target has a 80387, uses IEEE compatible arithmetic,
|
||||
and returns float values in the 387 and needs stack probes
|
||||
We also align doubles to 64-bits for MSVC default compatibility
|
||||
We do bitfields MSVC-compatably by default, too. */
|
||||
We do bitfields MSVC-compatibly by default, too. */
|
||||
#undef TARGET_SUBTARGET_DEFAULT
|
||||
#define TARGET_SUBTARGET_DEFAULT \
|
||||
(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE | \
|
||||
|
|
|
@ -503,7 +503,7 @@ const int x86_shift1 = ~m_486;
|
|||
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
|
||||
const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
|
||||
/* Set for machines where the type and dependencies are resolved on SSE register
|
||||
parts insetad of whole registers, so we may maintain just lower part of
|
||||
parts instead of whole registers, so we may maintain just lower part of
|
||||
scalar values in proper format leaving the upper part undefined. */
|
||||
const int x86_sse_partial_regs = m_ATHLON_K8;
|
||||
/* Athlon optimizes partial-register FPS special case, thus avoiding the
|
||||
|
@ -514,7 +514,7 @@ const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
|
|||
const int x86_use_ffreep = m_ATHLON_K8;
|
||||
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
|
||||
|
||||
/* In case the avreage insn count for single function invocation is
|
||||
/* In case the average insn count for single function invocation is
|
||||
lower than this constant, emit fast (but longer) prologue and
|
||||
epilogue code. */
|
||||
#define FAST_PROLOGUE_INSN_COUNT 20
|
||||
|
@ -651,8 +651,8 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
|
|||
-1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
|
||||
21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
|
||||
29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
|
||||
};
|
||||
|
||||
/* Test and compare insns in i386.md store the information needed to
|
||||
|
@ -879,9 +879,9 @@ static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
|
|||
/* Register class used for passing given 64bit part of the argument.
|
||||
These represent classes as documented by the PS ABI, with the exception
|
||||
of SSESF, SSEDF classes, that are basically SSE class, just gcc will
|
||||
use SF or DFmode move instead of DImode to avoid reformating penalties.
|
||||
use SF or DFmode move instead of DImode to avoid reformatting penalties.
|
||||
|
||||
Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
|
||||
Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
|
||||
whenever possible (upper half does contain padding).
|
||||
*/
|
||||
enum x86_64_reg_class
|
||||
|
@ -1085,7 +1085,7 @@ override_options ()
|
|||
real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
|
||||
|
||||
/* Set the default values for switches whose default depends on TARGET_64BIT
|
||||
in case they weren't overwriten by command line options. */
|
||||
in case they weren't overwritten by command line options. */
|
||||
if (TARGET_64BIT)
|
||||
{
|
||||
if (flag_omit_frame_pointer == 2)
|
||||
|
@ -1394,7 +1394,7 @@ override_options ()
|
|||
if (TARGET_3DNOW)
|
||||
{
|
||||
target_flags |= MASK_MMX;
|
||||
/* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
|
||||
/* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
|
||||
extensions it adds. */
|
||||
if (x86_3dnow_a & (1 << ix86_arch))
|
||||
target_flags |= MASK_3DNOW_A;
|
||||
|
@ -1810,7 +1810,7 @@ init_cumulative_args (cum, fntype, libname)
|
|||
return;
|
||||
}
|
||||
|
||||
/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
|
||||
/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
|
||||
of this code is to classify each 8bytes of incoming argument by the register
|
||||
class and assign registers accordingly. */
|
||||
|
||||
|
@ -2274,7 +2274,7 @@ construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regn
|
|||
break;
|
||||
case X86_64_INTEGER_CLASS:
|
||||
case X86_64_INTEGERSI_CLASS:
|
||||
/* Merge TImodes on aligned occassions here too. */
|
||||
/* Merge TImodes on aligned occasions here too. */
|
||||
if (i * 8 + 8 > bytes)
|
||||
tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
|
||||
else if (class[i] == X86_64_INTEGERSI_CLASS)
|
||||
|
@ -2536,8 +2536,8 @@ ix86_function_value (valtype)
|
|||
rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
|
||||
REGPARM_MAX, SSE_REGPARM_MAX,
|
||||
x86_64_int_return_registers, 0);
|
||||
/* For zero sized structures, construct_continer return NULL, but we need
|
||||
to keep rest of compiler happy by returning meaningfull value. */
|
||||
/* For zero sized structures, construct_container return NULL, but we need
|
||||
to keep rest of compiler happy by returning meaningful value. */
|
||||
if (!ret)
|
||||
ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
|
||||
return ret;
|
||||
|
@ -2719,7 +2719,7 @@ ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
|
|||
if (next_cum.sse_nregs)
|
||||
{
|
||||
/* Now emit code to save SSE registers. The AX parameter contains number
|
||||
of SSE parameter regsiters used to call this function. We use
|
||||
of SSE parameter registers used to call this function. We use
|
||||
sse_prologue_save insn template that produces computed jump across
|
||||
SSE saves. We need some preparation work to get this working. */
|
||||
|
||||
|
@ -2885,11 +2885,11 @@ ix86_va_arg (valist, type)
|
|||
need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
|
||||
|| TYPE_ALIGN (type) > 128);
|
||||
|
||||
/* In case we are passing structure, verify that it is consetuctive block
|
||||
/* In case we are passing structure, verify that it is consecutive block
|
||||
on the register save area. If not we need to do moves. */
|
||||
if (!need_temp && !REG_P (container))
|
||||
{
|
||||
/* Verify that all registers are strictly consetuctive */
|
||||
/* Verify that all registers are strictly consecutive */
|
||||
if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
|
||||
{
|
||||
int i;
|
||||
|
@ -3479,7 +3479,7 @@ const248_operand (op, mode)
|
|||
&& (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
|
||||
}
|
||||
|
||||
/* True if this is a constant appropriate for an increment or decremenmt. */
|
||||
/* True if this is a constant appropriate for an increment or decrement. */
|
||||
|
||||
int
|
||||
incdec_operand (op, mode)
|
||||
|
@ -3833,7 +3833,7 @@ ext_register_operand (op, mode)
|
|||
if (!register_operand (op, VOIDmode))
|
||||
return 0;
|
||||
|
||||
/* Be curefull to accept only registers having upper parts. */
|
||||
/* Be careful to accept only registers having upper parts. */
|
||||
regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
|
||||
return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
|
||||
}
|
||||
|
@ -4731,7 +4731,7 @@ ix86_expand_prologue ()
|
|||
is significantly longer, but also executes faster as modern hardware
|
||||
can execute the moves in parallel, but can't do that for push/pop.
|
||||
|
||||
Be curefull about choosing what prologue to emit: When function takes
|
||||
Be careful about choosing what prologue to emit: When function takes
|
||||
many instructions to execute we may use slow version as well as in
|
||||
case function is known to be outside hot spot (this is known with
|
||||
feedback only). Weight the size of function by number of registers
|
||||
|
@ -4834,7 +4834,7 @@ ix86_expand_prologue ()
|
|||
/* Even with accurate pre-reload life analysis, we can wind up
|
||||
deleting all references to the pic register after reload.
|
||||
Consider if cross-jumping unifies two sides of a branch
|
||||
controled by a comparison vs the only read from a global.
|
||||
controlled by a comparison vs the only read from a global.
|
||||
In which case, allow the set_got to be deleted, though we're
|
||||
too late to do anything about the ebx save in the prologue. */
|
||||
REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
|
||||
|
@ -4896,7 +4896,7 @@ ix86_expand_epilogue (style)
|
|||
while this code results in LEAVE instruction (or discrete equivalent),
|
||||
so it is profitable in some other cases as well. Especially when there
|
||||
are no registers to restore. We also use this code when TARGET_USE_LEAVE
|
||||
and there is exactly one register to pop. This heruistic may need some
|
||||
and there is exactly one register to pop. This heuristic may need some
|
||||
tuning in future. */
|
||||
if ((!sp_valid && frame.nregs <= 1)
|
||||
|| (TARGET_EPILOGUE_USING_MOVE
|
||||
|
@ -7080,7 +7080,7 @@ print_operand (file, x, code)
|
|||
int cputaken = final_forward_branch_p (current_output_insn) == 0;
|
||||
|
||||
/* Emit hints only in the case default branch prediction
|
||||
heruistics would fail. */
|
||||
heuristics would fail. */
|
||||
if (taken != cputaken)
|
||||
{
|
||||
/* We use 3e (DS) prefix for taken branches and
|
||||
|
@ -8448,7 +8448,7 @@ ix86_use_fcomi_compare (code)
|
|||
|
||||
/* Swap, force into registers, or otherwise massage the two operands
|
||||
to a fp comparison. The operands are updated in place; the new
|
||||
comparsion code is returned. */
|
||||
comparison code is returned. */
|
||||
|
||||
static enum rtx_code
|
||||
ix86_prepare_fp_compare_args (code, pop0, pop1)
|
||||
|
@ -8663,7 +8663,7 @@ ix86_fp_comparison_fcomi_cost (code)
|
|||
enum rtx_code code;
|
||||
{
|
||||
enum rtx_code bypass_code, first_code, second_code;
|
||||
/* Return arbitarily high cost when instruction is not supported - this
|
||||
/* Return arbitrarily high cost when instruction is not supported - this
|
||||
prevents gcc from using it. */
|
||||
if (!TARGET_CMOVE)
|
||||
return 1024;
|
||||
|
@ -8678,7 +8678,7 @@ ix86_fp_comparison_sahf_cost (code)
|
|||
enum rtx_code code;
|
||||
{
|
||||
enum rtx_code bypass_code, first_code, second_code;
|
||||
/* Return arbitarily high cost when instruction is not preferred - this
|
||||
/* Return arbitrarily high cost when instruction is not preferred - this
|
||||
avoids gcc from using it. */
|
||||
if (!TARGET_USE_SAHF && !optimize_size)
|
||||
return 1024;
|
||||
|
@ -9244,7 +9244,7 @@ ix86_expand_setcc (code, dest)
|
|||
return 1; /* DONE */
|
||||
}
|
||||
|
||||
/* Expand comparison setting or clearing carry flag. Return true when sucesfull
|
||||
/* Expand comparison setting or clearing carry flag. Return true when successful
|
||||
and set pop for the operation. */
|
||||
bool
|
||||
ix86_expand_carry_flag_compare (code, op0, op1, pop)
|
||||
|
@ -9901,7 +9901,7 @@ ix86_expand_fp_movcc (operands)
|
|||
VOIDmode, ix86_compare_op0,
|
||||
ix86_compare_op1);
|
||||
}
|
||||
/* Similary try to manage result to be first operand of conditional
|
||||
/* Similarly try to manage result to be first operand of conditional
|
||||
move. We also don't support the NE comparison on SSE, so try to
|
||||
avoid it. */
|
||||
if ((rtx_equal_p (operands[0], operands[3])
|
||||
|
@ -10645,7 +10645,7 @@ ix86_expand_movstr (dst, src, count_exp, align_exp)
|
|||
able to predict the branches) and also it is friendlier to the
|
||||
hardware branch prediction.
|
||||
|
||||
Using loops is benefical for generic case, because we can
|
||||
Using loops is beneficial for generic case, because we can
|
||||
handle small counts using the loops. Many CPUs (such as Athlon)
|
||||
have large REP prefix setup costs.
|
||||
|
||||
|
@ -11563,7 +11563,7 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
|
|||
rtx set, set2;
|
||||
int dep_insn_code_number;
|
||||
|
||||
/* Anti and output depenancies have zero cost on all CPUs. */
|
||||
/* Anti and output dependencies have zero cost on all CPUs. */
|
||||
if (REG_NOTE_KIND (link) != 0)
|
||||
return 0;
|
||||
|
||||
|
@ -11587,7 +11587,7 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
|
|||
if (ix86_flags_dependant (insn, dep_insn, insn_type))
|
||||
cost = 0;
|
||||
|
||||
/* Floating point stores require value to be ready one cycle ealier. */
|
||||
/* Floating point stores require value to be ready one cycle earlier. */
|
||||
if (insn_type == TYPE_FMOV
|
||||
&& get_attr_memory (insn) == MEMORY_STORE
|
||||
&& !ix86_agi_dependant (insn, dep_insn, insn_type))
|
||||
|
@ -14180,7 +14180,7 @@ ix86_register_move_cost (mode, class1, class2)
|
|||
|
||||
/* In case of copying from general_purpose_register we may emit multiple
|
||||
stores followed by single load causing memory size mismatch stall.
|
||||
Count this as arbitarily high cost of 20. */
|
||||
Count this as arbitrarily high cost of 20. */
|
||||
if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
|
||||
cost += 20;
|
||||
|
||||
|
@ -14438,7 +14438,7 @@ x86_order_regs_for_local_alloc ()
|
|||
for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
|
||||
reg_alloc_order [pos++] = i;
|
||||
|
||||
/* x87 registerts. */
|
||||
/* x87 registers. */
|
||||
if (TARGET_SSE_MATH)
|
||||
for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
|
||||
reg_alloc_order [pos++] = i;
|
||||
|
@ -14713,7 +14713,7 @@ x86_function_profiler (file, labelno)
|
|||
|
||||
/* Implement machine specific optimizations.
|
||||
At the moment we implement single transformation: AMD Athlon works faster
|
||||
when RET is not destination of conditional jump or directly preceeded
|
||||
when RET is not destination of conditional jump or directly preceded
|
||||
by other jump instruction. We avoid the penalty by inserting NOP just
|
||||
before the RET instructions in such cases. */
|
||||
void
|
||||
|
|
|
@ -745,12 +745,12 @@ extern int x86_prefetch_sse;
|
|||
/* Boundary (in *bits*) on which stack pointer should be aligned. */
|
||||
#define STACK_BOUNDARY BITS_PER_WORD
|
||||
|
||||
/* Boundary (in *bits*) on which the stack pointer preferrs to be
|
||||
/* Boundary (in *bits*) on which the stack pointer prefers to be
|
||||
aligned; the compiler cannot rely on having this alignment. */
|
||||
#define PREFERRED_STACK_BOUNDARY ix86_preferred_stack_boundary
|
||||
|
||||
/* As of July 2001, many runtimes to not align the stack properly when
|
||||
entering main. This causes expand_main_function to forcably align
|
||||
entering main. This causes expand_main_function to forcibly align
|
||||
the stack, which results in aligned frames for functions called from
|
||||
main, though it does nothing for the alignment of main itself. */
|
||||
#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \
|
||||
|
@ -771,7 +771,7 @@ extern int x86_prefetch_sse;
|
|||
might need to be aligned. No data type wants to be aligned
|
||||
rounder than this.
|
||||
|
||||
Pentium+ preferrs DFmode values to be aligned to 64 bit boundary
|
||||
Pentium+ prefers DFmode values to be aligned to 64 bit boundary
|
||||
and Pentium Pro XFmode values at 128 bit boundaries. */
|
||||
|
||||
#define BIGGEST_ALIGNMENT 128
|
||||
|
@ -781,7 +781,7 @@ extern int x86_prefetch_sse;
|
|||
((MODE) == XFmode || (MODE) == TFmode || SSE_REG_MODE_P (MODE))
|
||||
|
||||
/* The published ABIs say that doubles should be aligned on word
|
||||
boundaries, so lower the aligment for structure fields unless
|
||||
boundaries, so lower the alignment for structure fields unless
|
||||
-malign-double is set. */
|
||||
|
||||
/* ??? Blah -- this macro is used directly by libobjc. Since it
|
||||
|
@ -952,7 +952,7 @@ extern int x86_prefetch_sse;
|
|||
|
||||
/* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order
|
||||
to be rearranged based on a particular function. When using sse math,
|
||||
we want to allocase SSE before x87 registers and vice vera. */
|
||||
we want to allocate SSE before x87 registers and vice vera. */
|
||||
|
||||
#define ORDER_REGS_FOR_LOCAL_ALLOC x86_order_regs_for_local_alloc ()
|
||||
|
||||
|
@ -1441,7 +1441,7 @@ enum reg_class
|
|||
K is for signed imm8 operands.
|
||||
L is for andsi as zero-extending move.
|
||||
M is for shifts that can be executed by the "lea" opcode.
|
||||
N is for immedaite operands for out/in instructions (0-255)
|
||||
N is for immediate operands for out/in instructions (0-255)
|
||||
*/
|
||||
|
||||
#define CONST_OK_FOR_LETTER_P(VALUE, C) \
|
||||
|
|
|
@ -1205,7 +1205,7 @@
|
|||
(set_attr "modrm" "0,*,0,*,*,*,*,*,*,*")
|
||||
(set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")])
|
||||
|
||||
;; Stores and loads of ax to arbitary constant address.
|
||||
;; Stores and loads of ax to arbitrary constant address.
|
||||
;; We fake an second form of instruction to force reload to load address
|
||||
;; into register when rax is not available
|
||||
(define_insn "*movabssi_1_rex64"
|
||||
|
@ -1331,7 +1331,7 @@
|
|||
(const_string "HI")))
|
||||
(set_attr "modrm" "0,*,*,0,*,*")])
|
||||
|
||||
;; Stores and loads of ax to arbitary constant address.
|
||||
;; Stores and loads of ax to arbitrary constant address.
|
||||
;; We fake an second form of instruction to force reload to load address
|
||||
;; into register when rax is not available
|
||||
(define_insn "*movabshi_1_rex64"
|
||||
|
@ -1650,7 +1650,7 @@
|
|||
(const_string "SI")
|
||||
(const_string "QI")))])
|
||||
|
||||
;; Stores and loads of ax to arbitary constant address.
|
||||
;; Stores and loads of ax to arbitrary constant address.
|
||||
;; We fake an second form of instruction to force reload to load address
|
||||
;; into register when rax is not available
|
||||
(define_insn "*movabsqi_1_rex64"
|
||||
|
@ -1979,7 +1979,7 @@
|
|||
(set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*")
|
||||
(set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI")])
|
||||
|
||||
;; Stores and loads of ax to arbitary constant address.
|
||||
;; Stores and loads of ax to arbitrary constant address.
|
||||
;; We fake an second form of instruction to force reload to load address
|
||||
;; into register when rax is not available
|
||||
(define_insn "*movabsdi_1_rex64"
|
||||
|
@ -2256,7 +2256,7 @@
|
|||
"ix86_expand_move (DFmode, operands); DONE;")
|
||||
|
||||
;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
|
||||
;; Size of pushdf using integer insturctions is 2+2*memory operand size
|
||||
;; Size of pushdf using integer instructions is 2+2*memory operand size
|
||||
;; On the average, pushdf using integers can be still shorter. Allow this
|
||||
;; pattern for optimize_size too.
|
||||
|
||||
|
@ -2418,7 +2418,7 @@
|
|||
(const_int 0))
|
||||
(const_string "V2DF")]
|
||||
(const_string "DF"))
|
||||
/* For achitectures resolving dependencies on register
|
||||
/* For architectures resolving dependencies on register
|
||||
parts we may avoid extra work to zero out upper part
|
||||
of register. */
|
||||
(eq_attr "alternative" "7")
|
||||
|
@ -2538,7 +2538,7 @@
|
|||
(const_int 0))
|
||||
(const_string "V2DF")]
|
||||
(const_string "DF"))
|
||||
/* For achitectures resolving dependencies on register
|
||||
/* For architectures resolving dependencies on register
|
||||
parts we may avoid extra work to zero out upper part
|
||||
of register. */
|
||||
(eq_attr "alternative" "7")
|
||||
|
@ -2591,7 +2591,7 @@
|
|||
"ix86_expand_move (TFmode, operands); DONE;")
|
||||
|
||||
;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
|
||||
;; Size of pushdf using integer insturctions is 3+3*memory operand size
|
||||
;; Size of pushdf using integer instructions is 3+3*memory operand size
|
||||
;; Pushing using integer instructions is longer except for constants
|
||||
;; and direct memory references.
|
||||
;; (assuming that any given constant is pushed only once, but this ought to be
|
||||
|
@ -3930,7 +3930,7 @@
|
|||
[(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
|
||||
"")
|
||||
|
||||
; Avoid possible reformating penalty on the destination by first
|
||||
; Avoid possible reformatting penalty on the destination by first
|
||||
; zeroing it out
|
||||
(define_split
|
||||
[(set (match_operand:SF 0 "register_operand" "")
|
||||
|
@ -4683,7 +4683,7 @@
|
|||
(set_attr "mode" "SF")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
; Avoid possible reformating penalty on the destination by first
|
||||
; Avoid possible reformatting penalty on the destination by first
|
||||
; zeroing it out
|
||||
(define_split
|
||||
[(set (match_operand:SF 0 "register_operand" "")
|
||||
|
@ -4737,7 +4737,7 @@
|
|||
(set_attr "mode" "SF")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
; Avoid possible reformating penalty on the destination by first
|
||||
; Avoid possible reformatting penalty on the destination by first
|
||||
; zeroing it out
|
||||
(define_split
|
||||
[(set (match_operand:SF 0 "register_operand" "")
|
||||
|
@ -7420,7 +7420,7 @@
|
|||
"")
|
||||
|
||||
;; Allow to come the parameter in eax or edx to avoid extra moves.
|
||||
;; Penalize eax case sligthly because it results in worse scheduling
|
||||
;; Penalize eax case slightly because it results in worse scheduling
|
||||
;; of code.
|
||||
(define_insn "*divmoddi4_nocltd_rex64"
|
||||
[(set (match_operand:DI 0 "register_operand" "=&a,?a")
|
||||
|
@ -7505,7 +7505,7 @@
|
|||
"")
|
||||
|
||||
;; Allow to come the parameter in eax or edx to avoid extra moves.
|
||||
;; Penalize eax case sligthly because it results in worse scheduling
|
||||
;; Penalize eax case slightly because it results in worse scheduling
|
||||
;; of code.
|
||||
(define_insn "*divmodsi4_nocltd"
|
||||
[(set (match_operand:SI 0 "register_operand" "=&a,?a")
|
||||
|
@ -7978,7 +7978,7 @@
|
|||
;; Convert HImode/SImode test instructions with immediate to QImode ones.
|
||||
;; i386 does not allow to encode test with 8bit sign extended immediate, so
|
||||
;; this is relatively important trick.
|
||||
;; Do the converison only post-reload to avoid limiting of the register class
|
||||
;; Do the conversion only post-reload to avoid limiting of the register class
|
||||
;; to QI regs.
|
||||
(define_split
|
||||
[(set (reg 17)
|
||||
|
@ -8442,7 +8442,7 @@
|
|||
|
||||
;; Convert wide AND instructions with immediate operand to shorter QImode
|
||||
;; equivalents when possible.
|
||||
;; Don't do the splitting with memory operands, since it intoduces risc
|
||||
;; Don't do the splitting with memory operands, since it introduces risk
|
||||
;; of memory mismatch stalls. We may want to do the splitting for optimizing
|
||||
;; for size, but that can (should?) be handled by generic code instead.
|
||||
(define_split
|
||||
|
@ -9733,7 +9733,7 @@
|
|||
operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
|
||||
operands[1] = simplify_gen_subreg (TImode, operands[1], DFmode, 0);
|
||||
operands[2] = simplify_gen_subreg (TImode, operands[2], V2DFmode, 0);
|
||||
/* Avoid possible reformating on the operands. */
|
||||
/* Avoid possible reformatting on the operands. */
|
||||
if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
|
||||
emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
|
||||
if (operands_match_p (operands[0], operands[2]))
|
||||
|
@ -9862,7 +9862,7 @@
|
|||
operands[0] = gen_rtx_REG (SImode,
|
||||
true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));")
|
||||
|
||||
;; Conditionize these after reload. If they matches before reload, we
|
||||
;; Conditionalize these after reload. If they matches before reload, we
|
||||
;; lose the clobber and ability to use integer instructions.
|
||||
|
||||
(define_insn "*negsf2_1"
|
||||
|
@ -10194,7 +10194,7 @@
|
|||
operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
|
||||
operands[1] = simplify_gen_subreg (TImode, operands[1], DFmode, 0);
|
||||
operands[2] = simplify_gen_subreg (TImode, operands[2], V2DFmode, 0);
|
||||
/* Avoid possible reformating on the operands. */
|
||||
/* Avoid possible reformatting on the operands. */
|
||||
if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
|
||||
emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
|
||||
if (operands_match_p (operands[0], operands[2]))
|
||||
|
@ -12928,7 +12928,7 @@
|
|||
;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
|
||||
;; subsequent logical operations are used to imitate conditional moves.
|
||||
;; 0xffffffff is NaN, but not in normalized form, so we can't represent
|
||||
;; it directly. Futher holding this value in pseudo register might bring
|
||||
;; it directly. Further holding this value in pseudo register might bring
|
||||
;; problem in implicit normalization in spill code.
|
||||
;; So we don't define FLOAT_STORE_FLAG_VALUE and create these
|
||||
;; instructions after reload by splitting the conditional move patterns.
|
||||
|
@ -14002,7 +14002,7 @@
|
|||
emit_move_insn (operands[0], out);
|
||||
}
|
||||
|
||||
/* Pentium bsf instruction is extremly slow. The following code is
|
||||
/* Pentium bsf instruction is extremely slow. The following code is
|
||||
recommended by the Intel Optimizing Manual as a reasonable replacement:
|
||||
TEST EAX,EAX
|
||||
JZ SHORT BS2
|
||||
|
@ -16941,7 +16941,7 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
;; Split SSE based conditional move into seqence:
|
||||
;; Split SSE based conditional move into sequence:
|
||||
;; cmpCC op0, op4 - set op0 to 0 or ffffffff depending on the comparison
|
||||
;; and op2, op0 - zero op2 if comparison was false
|
||||
;; nand op0, op3 - load op3 to op0 if comparison was false
|
||||
|
@ -16983,7 +16983,7 @@
|
|||
operands[6] = operands[2], operands[7] = operands[4];
|
||||
})
|
||||
|
||||
;; Special case of conditional move we can handle effectivly.
|
||||
;; Special case of conditional move we can handle effectively.
|
||||
;; Do not brother with the integer/floating point case, since these are
|
||||
;; bot considerably slower, unlike in the generic case.
|
||||
(define_insn "*sse_movsfcc_const0_1"
|
||||
|
@ -17432,7 +17432,7 @@
|
|||
|
||||
;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
|
||||
;; Don't split NOTs with a displacement operand, because resulting XOR
|
||||
;; will not be pariable anyway.
|
||||
;; will not be pairable anyway.
|
||||
;;
|
||||
;; On AMD K6, NOT is vector decoded with memory operand that can not be
|
||||
;; represented using a modRM byte. The XOR replacement is long decoded,
|
||||
|
@ -19083,14 +19083,14 @@
|
|||
;; on integral types. We deal with this by representing the floating point
|
||||
;; logical as logical on arguments casted to TImode as this is what hardware
|
||||
;; really does. Unfortunately hardware requires the type information to be
|
||||
;; present and thus we must avoid subregs from being simplified and elliminated
|
||||
;; present and thus we must avoid subregs from being simplified and eliminated
|
||||
;; in later compilation phases.
|
||||
;;
|
||||
;; We have following variants from each instruction:
|
||||
;; sse_andsf3 - the operation taking V4SF vector operands
|
||||
;; and doing TImode cast on them
|
||||
;; *sse_andsf3_memory - the operation taking one memory operand casted to
|
||||
;; TImode, since backend insist on elliminating casts
|
||||
;; TImode, since backend insist on eliminating casts
|
||||
;; on memory operands
|
||||
;; sse_andti3_sf_1 - the operation taking SF scalar operands.
|
||||
;; We can not accept memory operand here as instruction reads
|
||||
|
@ -19098,7 +19098,7 @@
|
|||
;; scalar float operations that expands to logicals (fabs)
|
||||
;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode
|
||||
;; memory operand. Eventually combine can be able
|
||||
;; to synthetize these using splitter.
|
||||
;; to synthesize these using splitter.
|
||||
;; sse2_anddf3, *sse2_anddf3_memory
|
||||
;;
|
||||
;;
|
||||
|
@ -20691,7 +20691,7 @@
|
|||
[(set_attr "type" "mmxshft")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
;; 3DNow reciprical and sqrt
|
||||
;; 3DNow reciprocal and sqrt
|
||||
|
||||
(define_insn "pfrcpv2sf2"
|
||||
[(set (match_operand:V2SF 0 "register_operand" "=y")
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
;; fpu describes FPU unit
|
||||
;; load describes load unit.
|
||||
;; branch describes branch unit.
|
||||
;; store decsribes store unit. This unit is not modelled completely and only
|
||||
;; store describes store unit. This unit is not modelled completely and only
|
||||
;; used to model lea operation. Otherwise it lie outside of the critical
|
||||
;; path.
|
||||
;;
|
||||
|
|
|
@ -32,7 +32,7 @@ Boston, MA 02111-1307, USA. */
|
|||
|
||||
#define TARGET_EXECUTABLE_SUFFIX ".exe"
|
||||
|
||||
/* See i386/crtdll.h for an altervative definition. */
|
||||
/* See i386/crtdll.h for an alternative definition. */
|
||||
#define EXTRA_OS_CPP_BUILTINS() \
|
||||
do \
|
||||
{ \
|
||||
|
@ -102,7 +102,7 @@ Boston, MA 02111-1307, USA. */
|
|||
#define MATH_LIBRARY ""
|
||||
|
||||
/* Output STRING, a string representing a filename, to FILE.
|
||||
We canonicalize it to be in Unix format (backslashe are replaced
|
||||
We canonicalize it to be in Unix format (backslashes are replaced
|
||||
forward slashes. */
|
||||
#undef OUTPUT_QUOTED_STRING
|
||||
#define OUTPUT_QUOTED_STRING(FILE, STRING) \
|
||||
|
@ -129,6 +129,6 @@ do { \
|
|||
putc ('\"', asm_file); \
|
||||
} while (0)
|
||||
|
||||
/* Define as short unsigned for compatability with MS runtime. */
|
||||
/* Define as short unsigned for compatibility with MS runtime. */
|
||||
#undef WINT_TYPE
|
||||
#define WINT_TYPE "short unsigned int"
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium
|
||||
;; rules, because it results in noticeably better code on non-MMX Pentium
|
||||
;; and doesn't hurt much on MMX. (Prefixed instructions are not very
|
||||
;; common, so the scheduler usualy has a non-prefixed insn to pair).
|
||||
;; common, so the scheduler usually has a non-prefixed insn to pair).
|
||||
|
||||
(define_attr "pent_pair" "uv,pu,pv,np"
|
||||
(cond [(eq_attr "imm_disp" "true")
|
||||
|
@ -71,7 +71,7 @@
|
|||
(define_automaton "pentium,pentium_fpu")
|
||||
|
||||
;; Pentium do have U and V pipes. Instruction to both pipes
|
||||
;; are alwyas issued together, much like on VLIW.
|
||||
;; are always issued together, much like on VLIW.
|
||||
;;
|
||||
;; predecode
|
||||
;; / \
|
||||
|
|
|
@ -72,7 +72,7 @@ Boston, MA 02111-1307, USA. */
|
|||
#define EH_FRAME_SECTION_NAME \
|
||||
((TARGET_ELF) ? EH_FRAME_SECTION_NAME_ELF : EH_FRAME_SECTION_NAME_COFF)
|
||||
|
||||
/* Avoid problems (long sectino names, forward assembler refs) with DWARF
|
||||
/* Avoid problems (long section names, forward assembler refs) with DWARF
|
||||
exception unwinding when we're generating COFF */
|
||||
#define DWARF2_UNWIND_INFO \
|
||||
((TARGET_ELF) ? 1 : 0 )
|
||||
|
|
|
@ -579,7 +579,7 @@ i386_pe_unique_section (decl, reloc)
|
|||
If the section has already been defined, to not allow it to have
|
||||
different attributes, as (1) this is ambiguous since we're not seeing
|
||||
all the declarations up front and (2) some assemblers (e.g. SVR4)
|
||||
do not recoginize section redefinitions. */
|
||||
do not recognize section redefinitions. */
|
||||
/* ??? This differs from the "standard" PE implementation in that we
|
||||
handle the SHARED variable attribute. Should this be done for all
|
||||
PE targets? */
|
||||
|
|
|
@ -37,10 +37,10 @@
|
|||
/* We need type definitions from the MMX header file. */
|
||||
#include <mmintrin.h>
|
||||
|
||||
/* The data type indended for user use. */
|
||||
/* The data type intended for user use. */
|
||||
typedef int __m128 __attribute__ ((__mode__(__V4SF__)));
|
||||
|
||||
/* Internal data types for implementing the instrinsics. */
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef int __v4sf __attribute__ ((__mode__(__V4SF__)));
|
||||
typedef int __v4si __attribute__ ((__mode__(__V4SI__)));
|
||||
|
||||
|
@ -1047,7 +1047,7 @@ _mm_stream_ps (float *__P, __m128 __A)
|
|||
__builtin_ia32_movntps (__P, (__v4sf)__A);
|
||||
}
|
||||
|
||||
/* Guarantees that every preceeding store is globally visible before
|
||||
/* Guarantees that every preceding store is globally visible before
|
||||
any subsequent store. */
|
||||
static __inline void
|
||||
_mm_sfence (void)
|
||||
|
@ -1114,21 +1114,21 @@ _mm_load_pd1 (double const *__P)
|
|||
return _mm_load1_pd (__P);
|
||||
}
|
||||
|
||||
/* Load two DPFP values from P. The addresd must be 16-byte aligned. */
|
||||
/* Load two DPFP values from P. The address must be 16-byte aligned. */
|
||||
static __inline __m128d
|
||||
_mm_load_pd (double const *__P)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_loadapd (__P);
|
||||
}
|
||||
|
||||
/* Load two DPFP values from P. The addresd need not be 16-byte aligned. */
|
||||
/* Load two DPFP values from P. The address need not be 16-byte aligned. */
|
||||
static __inline __m128d
|
||||
_mm_loadu_pd (double const *__P)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_loadupd (__P);
|
||||
}
|
||||
|
||||
/* Load two DPFP values in reverse order. The addresd must be aligned. */
|
||||
/* Load two DPFP values in reverse order. The address must be aligned. */
|
||||
static __inline __m128d
|
||||
_mm_loadr_pd (double const *__P)
|
||||
{
|
||||
|
@ -1208,21 +1208,21 @@ _mm_store_pd1 (double *__P, __m128d __A)
|
|||
_mm_store1_pd (__P, __A);
|
||||
}
|
||||
|
||||
/* Store two DPFP values. The addresd must be 16-byte aligned. */
|
||||
/* Store two DPFP values. The address must be 16-byte aligned. */
|
||||
static __inline void
|
||||
_mm_store_pd (double *__P, __m128d __A)
|
||||
{
|
||||
__builtin_ia32_storeapd (__P, (__v2df)__A);
|
||||
}
|
||||
|
||||
/* Store two DPFP values. The addresd need not be 16-byte aligned. */
|
||||
/* Store two DPFP values. The address need not be 16-byte aligned. */
|
||||
static __inline void
|
||||
_mm_storeu_pd (double *__P, __m128d __A)
|
||||
{
|
||||
__builtin_ia32_storeupd (__P, (__v2df)__A);
|
||||
}
|
||||
|
||||
/* Store two DPFP values in reverse order. The addresd must be aligned. */
|
||||
/* Store two DPFP values in reverse order. The address must be aligned. */
|
||||
static __inline void
|
||||
_mm_storer_pd (double *__P, __m128d __A)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue